diff --git a/Documentation/Examples/Matrices/CMakeLists.txt b/Documentation/Examples/Matrices/CMakeLists.txt index 710524aaa5bcf756e6814352e6be8d5d0dc296af..8e4f5b37d658d74a13ed3d949bca36a25feacb21 100644 --- a/Documentation/Examples/Matrices/CMakeLists.txt +++ b/Documentation/Examples/Matrices/CMakeLists.txt @@ -1,161 +1,5 @@ -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list_cuda DenseMatrixExample_Constructor_init_list.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out - OUTPUT DenseMatrixExample_Constructor_init_list.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElements_cuda DenseMatrixExample_setElements.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out - OUTPUT DenseMatrixExample_setElements.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths_cuda DenseMatrixExample_getCompressedRowLengths.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out - OUTPUT DenseMatrixExample_getCompressedRowLengths.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElementsCount_cuda DenseMatrixExample_getElementsCount.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out - OUTPUT DenseMatrixExample_getElementsCount.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_getConstRow_cuda DenseMatrixExample_getConstRow.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out - OUTPUT DenseMatrixExample_getConstRow.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_getRow_cuda DenseMatrixExample_getRow.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out - OUTPUT DenseMatrixExample_getRow.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement_cuda DenseMatrixExample_setElement.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out - OUTPUT DenseMatrixExample_setElement.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement_cuda DenseMatrixExample_addElement.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out - OUTPUT DenseMatrixExample_addElement.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElement_cuda DenseMatrixExample_getElement.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out - OUTPUT DenseMatrixExample_getElement.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_cuda DenseMatrixExample_rowsReduction.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out - OUTPUT DenseMatrixExample_rowsReduction.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction_cuda DenseMatrixExample_allRowsReduction.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out - OUTPUT DenseMatrixExample_allRowsReduction.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_forRows_cuda DenseMatrixExample_forRows.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out - OUTPUT DenseMatrixExample_forRows.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixExample_forAllRows_cuda DenseMatrixExample_forAllRows.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out - OUTPUT DenseMatrixExample_forAllRows.out ) - - CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out - OUTPUT DenseMatrixViewExample_constructor.out ) - -ELSE() - ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out - OUTPUT DenseMatrixExample_Constructor_init_list.out ) - - ADD_EXECUTABLE( DenseMatrixExample_setElements DenseMatrixExample_setElements.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out - OUTPUT DenseMatrixExample_setElements.out ) - - ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths DenseMatrixExample_getCompressedRowLengths.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out - OUTPUT DenseMatrixExample_getCompressedRowLengths.out ) - - ADD_EXECUTABLE( DenseMatrixExample_getElementsCount DenseMatrixExample_getElementsCount.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out - OUTPUT DenseMatrixExample_getElementsCount.out ) - - ADD_EXECUTABLE( DenseMatrixExample_getConstRow DenseMatrixExample_getConstRow.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out - OUTPUT DenseMatrixExample_getConstRow.out ) - - ADD_EXECUTABLE( DenseMatrixExample_getRow DenseMatrixExample_getRow.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out - OUTPUT DenseMatrixExample_getRow.out ) - - ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out - OUTPUT DenseMatrixExample_setElement.out ) - - ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out - OUTPUT DenseMatrixExample_addElement.out ) - - ADD_EXECUTABLE( DenseMatrixExample_getElement DenseMatrixExample_getElement.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out - OUTPUT DenseMatrixExample_getElement.out ) - - ADD_EXECUTABLE( DenseMatrixExample_rowsReduction DenseMatrixExample_rowsReduction.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out - OUTPUT DenseMatrixExample_rowsReduction.out ) - - ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction DenseMatrixExample_allRowsReduction.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out - OUTPUT DenseMatrixExample_allRowsReduction.out ) - - ADD_EXECUTABLE( DenseMatrixExample_forRows DenseMatrixExample_forRows.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out - OUTPUT DenseMatrixExample_forRows.out ) - - ADD_EXECUTABLE( DenseMatrixExample_forAllRows DenseMatrixExample_forAllRows.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out - OUTPUT DenseMatrixExample_forAllRows.out ) - - ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp ) - ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor > - ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out - OUTPUT DenseMatrixViewExample_constructor.out ) - -ENDIF() - -ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS - DenseMatrixExample_Constructor_init_list.out - DenseMatrixExample_setElements.out - DenseMatrixExample_getCompressedRowLengths.out - DenseMatrixExample_getElementsCount.out - DenseMatrixExample_getConstRow.out - DenseMatrixExample_getRow.out - DenseMatrixExample_setElement.out - DenseMatrixExample_addElement.out - DenseMatrixExample_getElement.out - DenseMatrixExample_rowsReduction.out - DenseMatrixExample_allRowsReduction.out - DenseMatrixExample_forRows.out - DenseMatrixExample_forAllRows.out - DenseMatrixViewExample_constructor.out -) - +ADD_SUBDIRECTORY( DenseMatrix ) +ADD_SUBDIRECTORY( LambdaMatrix ) +ADD_SUBDIRECTORY( MultidiagonalMatrix ) +ADD_SUBDIRECTORY( SparseMatrix ) +ADD_SUBDIRECTORY( TridiagonalMatrix ) diff --git a/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..156b19dba5c25b19be76a2c401098084fbcf33c4 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/CMakeLists.txt @@ -0,0 +1,283 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list_cuda DenseMatrixExample_Constructor_init_list.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out + OUTPUT DenseMatrixExample_Constructor_init_list.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElements_cuda DenseMatrixExample_setElements.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out + OUTPUT DenseMatrixExample_setElements.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths_cuda DenseMatrixExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out + OUTPUT DenseMatrixExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElementsCount_cuda DenseMatrixExample_getElementsCount.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out + OUTPUT DenseMatrixExample_getElementsCount.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_getConstRow_cuda DenseMatrixExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out + OUTPUT DenseMatrixExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_getRow_cuda DenseMatrixExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out + OUTPUT DenseMatrixExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_setElement_cuda DenseMatrixExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out + OUTPUT DenseMatrixExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_addElement_cuda DenseMatrixExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out + OUTPUT DenseMatrixExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_getElement_cuda DenseMatrixExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out + OUTPUT DenseMatrixExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_rowsReduction_cuda DenseMatrixExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out + OUTPUT DenseMatrixExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction_cuda DenseMatrixExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out + OUTPUT DenseMatrixExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_forRows_cuda DenseMatrixExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out + OUTPUT DenseMatrixExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixExample_forAllRows_cuda DenseMatrixExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out + OUTPUT DenseMatrixExample_forAllRows.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out + OUTPUT DenseMatrixViewExample_constructor.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths_cuda DenseMatrixViewExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out + OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount_cuda DenseMatrixViewExample_getElementsCount.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out + OUTPUT DenseMatrixViewExample_getElementsCount.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow_cuda DenseMatrixViewExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out + OUTPUT DenseMatrixViewExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getRow_cuda DenseMatrixViewExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out + OUTPUT DenseMatrixViewExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_setElement_cuda DenseMatrixViewExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out + OUTPUT DenseMatrixViewExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_addElement_cuda DenseMatrixViewExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out + OUTPUT DenseMatrixViewExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_getElement_cuda DenseMatrixViewExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out + OUTPUT DenseMatrixViewExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction_cuda DenseMatrixViewExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out + OUTPUT DenseMatrixViewExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction_cuda DenseMatrixViewExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out + OUTPUT DenseMatrixViewExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forRows_cuda DenseMatrixViewExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forRows.out + OUTPUT DenseMatrixViewExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_forAllRows_cuda DenseMatrixViewExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forAllRows.out + OUTPUT DenseMatrixViewExample_forAllRows.out ) + +ELSE() + ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_Constructor_init_list.out + OUTPUT DenseMatrixExample_Constructor_init_list.out ) + + ADD_EXECUTABLE( DenseMatrixExample_setElements DenseMatrixExample_setElements.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElements > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElements.out + OUTPUT DenseMatrixExample_setElements.out ) + + ADD_EXECUTABLE( DenseMatrixExample_getCompressedRowLengths DenseMatrixExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getCompressedRowLengths.out + OUTPUT DenseMatrixExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( DenseMatrixExample_getElementsCount DenseMatrixExample_getElementsCount.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElementsCount > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElementsCount.out + OUTPUT DenseMatrixExample_getElementsCount.out ) + + ADD_EXECUTABLE( DenseMatrixExample_getConstRow DenseMatrixExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getConstRow.out + OUTPUT DenseMatrixExample_getConstRow.out ) + + ADD_EXECUTABLE( DenseMatrixExample_getRow DenseMatrixExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getRow.out + OUTPUT DenseMatrixExample_getRow.out ) + + ADD_EXECUTABLE( DenseMatrixExample_setElement DenseMatrixExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_setElement.out + OUTPUT DenseMatrixExample_setElement.out ) + + ADD_EXECUTABLE( DenseMatrixExample_addElement DenseMatrixExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_addElement.out + OUTPUT DenseMatrixExample_addElement.out ) + + ADD_EXECUTABLE( DenseMatrixExample_getElement DenseMatrixExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_getElement.out + OUTPUT DenseMatrixExample_getElement.out ) + + ADD_EXECUTABLE( DenseMatrixExample_rowsReduction DenseMatrixExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_rowsReduction.out + OUTPUT DenseMatrixExample_rowsReduction.out ) + + ADD_EXECUTABLE( DenseMatrixExample_allRowsReduction DenseMatrixExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_allRowsReduction.out + OUTPUT DenseMatrixExample_allRowsReduction.out ) + + ADD_EXECUTABLE( DenseMatrixExample_forRows DenseMatrixExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forRows.out + OUTPUT DenseMatrixExample_forRows.out ) + + ADD_EXECUTABLE( DenseMatrixExample_forAllRows DenseMatrixExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out + OUTPUT DenseMatrixExample_forAllRows.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out + OUTPUT DenseMatrixViewExample_constructor.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_getCompressedRowLengths DenseMatrixViewExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getCompressedRowLengths.out + OUTPUT DenseMatrixViewExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_getElementsCount DenseMatrixViewExample_getElementsCount.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElementsCount > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElementsCount.out + OUTPUT DenseMatrixViewExample_getElementsCount.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_getConstRow DenseMatrixViewExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getConstRow.out + OUTPUT DenseMatrixViewExample_getConstRow.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_getRow DenseMatrixViewExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getRow.out + OUTPUT DenseMatrixViewExample_getRow.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_setElement DenseMatrixViewExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_setElement.out + OUTPUT DenseMatrixViewExample_setElement.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_addElement DenseMatrixViewExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_addElement.out + OUTPUT DenseMatrixViewExample_addElement.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_getElement DenseMatrixViewExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_getElement.out + OUTPUT DenseMatrixViewExample_getElement.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_rowsReduction DenseMatrixViewExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_rowsReduction.out + OUTPUT DenseMatrixViewExample_rowsReduction.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_allRowsReduction DenseMatrixViewExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_allRowsReduction.out + OUTPUT DenseMatrixViewExample_allRowsReduction.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_forRows DenseMatrixViewExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forRows.out + OUTPUT DenseMatrixViewExample_forRows.out ) + + ADD_EXECUTABLE( DenseMatrixViewExample_forAllRows DenseMatrixViewExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_forAllRows.out + OUTPUT DenseMatrixViewExample_forAllRows.out ) + +ENDIF() + +ADD_CUSTOM_TARGET( RunDenseMatricesExamples ALL DEPENDS + DenseMatrixExample_Constructor_init_list.out + DenseMatrixExample_setElements.out + DenseMatrixExample_getCompressedRowLengths.out + DenseMatrixExample_getElementsCount.out + DenseMatrixExample_getConstRow.out + DenseMatrixExample_getRow.out + DenseMatrixExample_setElement.out + DenseMatrixExample_addElement.out + DenseMatrixExample_getElement.out + DenseMatrixExample_rowsReduction.out + DenseMatrixExample_allRowsReduction.out + DenseMatrixExample_forRows.out + DenseMatrixExample_forAllRows.out + DenseMatrixViewExample_constructor.out + DenseMatrixViewExample_getCompressedRowLengths.out + DenseMatrixViewExample_getElementsCount.out + DenseMatrixViewExample_getConstRow.out + DenseMatrixViewExample_getRow.out + DenseMatrixViewExample_setElement.out + DenseMatrixViewExample_addElement.out + DenseMatrixViewExample_getElement.out + DenseMatrixViewExample_rowsReduction.out + DenseMatrixViewExample_allRowsReduction.out + DenseMatrixViewExample_forRows.out + DenseMatrixViewExample_forAllRows.out + +) + diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_Constructor_init_list.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp similarity index 80% rename from Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp index 32e39e6a3ec5fb55618ed54523db22f34ed0ebbd..4a36b007e442a492fad1944f01d8d6cc6c1e3294 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp @@ -9,13 +9,13 @@ void addElements() for( int i = 0; i < 5; i++ ) matrix.setElement( i, i, i ); - std::cout << "Initial matrix is: " << matrix << std::endl; + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; for( int i = 0; i < 5; i++ ) for( int j = 0; j < 5; j++ ) matrix.addElement( i, j, 1.0, 5.0 ); - std::cout << "Matrix after addition is: " << matrix << std::endl; + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; } int main( int argc, char* argv[] ) diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_addElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_addElement.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_addElement.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_allRowsReduction.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp similarity index 94% rename from Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp index 5fddf0f34ff789591802da01c143c3600baeadf3..e218db69022f8f4b43df7d6956fbc6afb0cfde00 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp @@ -8,7 +8,7 @@ void forAllRowsExample() { TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); - auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) { + auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) { if( rowIdx < columnIdx ) compute = false; else diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_forAllRows.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp similarity index 94% rename from Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp index f3e45a006adbe3f73ec5f37b89afc7f0aed81cce..f98c580fdf36ff6c2d0a13d12f35d4128970310a 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp @@ -8,7 +8,7 @@ void forRowsExample() { TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); - auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) { + auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int columnIdx_, double& value, bool& compute ) { if( rowIdx < columnIdx ) compute = false; else diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_forRows.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_forRows.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getCompressedRowLengths.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getConstRow.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp similarity index 89% rename from Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp index 72a5d0af44682b84c8e503c5bcc561666eb43088..7bd089ed8ade872dc6001ba97fec05659e95e2f5 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp @@ -17,7 +17,7 @@ void getElements() for( int i = 0; i < 5; i++ ) { for( int j = 0; j < 5; j++ ) - std::cout << std::setw( 5 ) << std::ios::right << matrix.getElement( i, i ); + std::cout << std::setw( 5 ) << matrix.getElement( i, j ); std::cout << std::endl; } } diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getElement.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElement.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp similarity index 94% rename from Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp index a95fa00e77e9f6b40de672a21931005c17862549..0a4a7bb7bb30ae92cb3464a4e8db8a9de1940fe4 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp @@ -14,7 +14,7 @@ void getElementsCountExample() { 11, 12, 13, 14, 15 } }; - std::cout << "Matrix elements count is " << triangularMatrix.getElementsCount() << "." << std::endl; + std::cout << "Matrix elements count is " << triangularMatrix.getAllocatedElementsCount() << "." << std::endl; std::cout << "Non-zero matrix elements count is " << triangularMatrix.getNonzeroElementsCount() << "." << std::endl; } diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getElementsCount.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cu diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu new file mode 120000 index 0000000000000000000000000000000000000000..045fa3c1b11ffaf2bcad06b46462823230cf80ac --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getNonzeroElementsCount.cu @@ -0,0 +1 @@ +DenseMatrixExample_getNonzeroElementsCount.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp similarity index 95% rename from Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp index 00a6b11192a5f7fdedfc5964db674ed5fc4c05b7..0cf1e1d76d938a32b95b8702d659d47025851998 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp @@ -13,7 +13,7 @@ void getRowExample() auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrix->getRow( rowIdx ); - row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); + row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; /*** diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_getRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_getRow.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_getRow.cu diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b521d15d26cc789c6f3f8b6c32ead723d35ac1f4 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix { + { 1, 0, 0, 0, 0 }, + { 1, 2, 0, 0, 0 }, + { 0, 1, 8, 0, 0 }, + { 0, 0, 1, 9, 0 }, + { 0, 0, 0, 0, 1 } }; + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp similarity index 82% rename from Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp index 4f92496f83fab014758a616aee69fa50497b0037..9441cc60d8418030bf9cd6951483a726532d85f0 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -18,12 +19,17 @@ void setElements() auto f = [=] __cuda_callable__ ( int i ) mutable { matrix->setElement( i, i, -i ); }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use DenseMatrixView. See + * DenseMatrixView::getRow example for details. + */ TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); std::cout << "Matrix set from its native device:" << std::endl; std::cout << *matrix << std::endl; - } int main( int argc, char* argv[] ) diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_setElement.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElement.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_setElements.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElements.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_setElements.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixExample_setElements.cu diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc72a6241037545c88d675a4d08d32f375a9a8d6 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + auto matrixView = matrix.getView(); + + for( int i = 0; i < 5; i++ ) + matrixView.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < 5; i++ ) + for( int j = 0; j < 5; j++ ) + matrixView.addElement( i, j, 1.0, 5.0 ); + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..9266b5e73d4f3a375a42070b53544e30d6ff6ded --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_allRowsReduction.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp similarity index 56% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp index e8c036fa4d325bfd2eab7144eebae7337fc23ad5..d180caa518bbaaf05d3f245006c5a58637196919 100644 --- a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp @@ -12,19 +12,19 @@ void createMatrixView() 5, 6, 7, 8, 9, 10, 11, 12 }; - TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > matrix( 5, 5, values.getView() ); - /*** - * We need a matrix view to pass the matrix to lambda function even on CUDA device. + * Create dense matrix view with row major order */ - /*auto matrixView = matrix.getView(); - auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { - auto row = matrixView.getRow( rowIdx ); - row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); - }; + TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > rowMajorMatrix( 3, 4, values.getView() ); + std::cout << "Row major order matrix:" << std::endl; + std::cout << rowMajorMatrix << std::endl; - TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); - std::cout << matrix << std::endl;*/ + /*** + * Create dense matrix view with column major order + */ + TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > columnMajorMatrix( 4, 3, values.getView() ); + std::cout << "Column major order matrix:" << std::endl; + std::cout << columnMajorMatrix << std::endl; } int main( int argc, char* argv[] ) diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cu similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cu diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3c51e8ee5cb697af4de70f217e833001d852ab73 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +template< typename Device > +void forAllRowsExample() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + auto matrixView = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) + compute = false; + else + value = rowIdx + columnIdx; + }; + + matrixView.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forAllRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forAllRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..6b0114a09af2b8cdf504f518df9173935a71054b --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..810bf11186d26c707ce6138beac7467c5b44c97b --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + auto matrixView = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int columnIdx, int globalIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) + compute = false; + else + value = rowIdx + columnIdx; + }; + + matrixView.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..8111505a3bafe0c6aaad3434405418d628efeb90 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4572f41a38104aab7297d99a7e65920d351c7dfe --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void getCompressedRowLengthsExample() +{ + TNL::Matrices::DenseMatrix< double, Device > triangularMatrix { + { 1 }, + { 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9, 10 }, + { 11, 12, 13, 14, 15 } + }; + auto triangularMatrixView = triangularMatrix.getConstView(); + + std::cout << triangularMatrixView << std::endl; + + TNL::Containers::Vector< int, Device > rowLengths; + triangularMatrixView.getCompressedRowLengths( rowLengths ); + + std::cout << "Compressed row lengths are: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting compressed row lengths on host: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting compressed row lengths on CUDA device: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..d9f9713a8c6ec97306ca86d804bcd0eeae57105b --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_getConstRow.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..facd73fa1735401206d57fc286d9c1f6851ef7b4 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f4be000b903246e206963038d125a009bbbd568 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix { + { 1, 0, 0, 0, 0 }, + { -1, 2, -1, 0, 0 }, + { 0, -1, 2, -1, 0 }, + { 0, 0, -1, 2, -1 }, + { 0, 0, 0, 0, 1 } }; + auto matrixView = matrix.getConstView(); + + + for( int i = 0; i < 5; i++ ) + { + for( int j = 0; j < 5; j++ ) + std::cout << std::setw( 5 ) << std::ios::right << matrixView.getElement( i, i ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..e2a308ee52189892fb9dbc64425a69854122fba8 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04566eb27ba424040954a55e5e6ddc30859467b2 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +template< typename Device > +void getElementsCountExample() +{ + TNL::Matrices::DenseMatrix< double, Device > triangularMatrix { + { 1 }, + { 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9, 10 }, + { 11, 12, 13, 14, 15 } + }; + auto triangularMatrixView = triangularMatrix.getConstView(); + + std::cout << "Matrix elements count is " << triangularMatrixView.getAllocatedElementsCount() << "." << std::endl; + std::cout << "Non-zero matrix elements count is " << triangularMatrixView.getNonzeroElementsCount() << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Computing matrix elements on host: " << std::endl; + getElementsCountExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Computing matrix elements on CUDA device: " << std::endl; + getElementsCountExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu new file mode 120000 index 0000000000000000000000000000000000000000..b5d2bcebd60b9ff56fbe83a7095864dd10cff83c --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_getElementsCount.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp similarity index 81% rename from Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp index 30d893bc18ebc27f35c55566b900cb506084122c..8142599c14c0aa6f810f55ba3ff0d24e21c761de 100644 --- a/Documentation/Examples/Matrices/DenseMatrixViewExample_getRow.cpp +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp @@ -10,14 +10,19 @@ void getRowExample() TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); /*** - * We need a matrix view to pass the matrix to lambda function even on CUDA device. + * Create dense matrix view which can be captured by the following lambda + * function. */ auto matrixView = matrix.getView(); + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); - row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); + row.setElement( rowIdx, 10 * ( rowIdx + 1 ) ); }; + /*** + * Set the matrix elements. + */ TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); std::cout << matrix << std::endl; } @@ -31,4 +36,4 @@ int main( int argc, char* argv[] ) std::cout << "Getting matrix rows on CUDA device: " << std::endl; getRowExample< TNL::Devices::Cuda >(); #endif -} \ No newline at end of file +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..5af514505570549792f25111233876e7e7db3816 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp similarity index 100% rename from Documentation/Examples/Matrices/DenseMatrixExample_rowsReduction.cpp rename to Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..4170aaa628a8965768169b1da468517430143990 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92985bc5aafc465277d2c571a20d7f64391d6357 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + auto matrixView = matrix.getView(); + for( int i = 0; i < 5; i++ ) + matrixView.setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + matrixView.setElement( i, i, -i ); + }; + TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..902de92255413ee4789d85480658bb84abb8dda5 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6315309b2362e822e35569316331e819e115566d --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/CMakeLists.txt @@ -0,0 +1,69 @@ +ADD_EXECUTABLE( LambdaMatrixExample_Constructor LambdaMatrixExample_Constructor.cpp ) +ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Constructor > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Constructor.out + OUTPUT LambdaMatrixExample_Constructor.out ) + +ADD_EXECUTABLE( LambdaMatrixExample_getCompressedRowLengths LambdaMatrixExample_getCompressedRowLengths.cpp ) +ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getCompressedRowLengths.out + OUTPUT LambdaMatrixExample_getCompressedRowLengths.out ) + +ADD_EXECUTABLE( LambdaMatrixExample_getNonzeroElementsCount LambdaMatrixExample_getNonzeroElementsCount.cpp ) +ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getNonzeroElementsCount > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getNonzeroElementsCount.out + OUTPUT LambdaMatrixExample_getNonzeroElementsCount.out ) + + +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction_cuda LambdaMatrixExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out + OUTPUT LambdaMatrixExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction_cuda LambdaMatrixExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out + OUTPUT LambdaMatrixExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forRows_cuda LambdaMatrixExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forRows.out + OUTPUT LambdaMatrixExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( LambdaMatrixExample_forAllRows_cuda LambdaMatrixExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forAllRows.out + OUTPUT LambdaMatrixExample_forAllRows.out ) + +ELSE() + ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction LambdaMatrixExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out + OUTPUT LambdaMatrixExample_rowsReduction.out ) + + ADD_EXECUTABLE( LambdaMatrixExample_allRowsReduction LambdaMatrixExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_allRowsReduction.out + OUTPUT LambdaMatrixExample_allRowsReduction.out ) + + ADD_EXECUTABLE( LambdaMatrixExample_forRows LambdaMatrixExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forRows.out + OUTPUT LambdaMatrixExample_forRows.out ) + + ADD_EXECUTABLE( LambdaMatrixExample_forAllRows LambdaMatrixExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_forAllRows.out + OUTPUT LambdaMatrixExample_forAllRows.out ) +ENDIF() + +ADD_CUSTOM_TARGET( RunLambdaMatricesExamples ALL DEPENDS + LambdaMatrixExample_Constructor.out + LambdaMatrixExample_getCompressedRowLengths.out + LambdaMatrixExample_getNonzeroElementsCount.out + LambdaMatrixExample_rowsReduction.out + LambdaMatrixExample_allRowsReduction.out + LambdaMatrixExample_forRows.out + LambdaMatrixExample_forAllRows.out +) + diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2c418dd5407db95fa75a3ea49c9664db4be19fa2 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp @@ -0,0 +1,35 @@ +#include +#include + +int main( int argc, char* argv[] ) +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return 1; }; + auto matrixElements1 = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = rowIdx; + value = 1.0; + }; + auto matrixElements2 = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = rowIdx; + value = rowIdx; + }; + + const int size = 5; + + /*** + * Matrix construction with explicit type definition. + */ + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( matrixElements1, rowLengths ) ); + MatrixType m1( size, size, matrixElements1, rowLengths ); + + /*** + * Matrix construction using 'auto'. + */ + auto m2 = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( matrixElements2, rowLengths ); + m2.setDimensions( size, size ); + + std::cout << "The first lambda matrix: " << std::endl << m1 << std::endl; + std::cout << "The second lambda matrix: " << std::endl << m2 << std::endl; +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a39a1f33b339aeeb9420612009bcd0080ffafb44 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void allRowsReduction() +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >; + auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view and matrix view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "All rows reduction on host:" << std::endl; + allRowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "All rows reduction on CUDA device:" << std::endl; + allRowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..f76fba15c56a9d0c0f26f605c6745b2e3cd28da3 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cu @@ -0,0 +1 @@ +LambdaMatrixExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..72ff9610141cdcc702bfc25128d37fbff2eec423 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >; + auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths ); + + TNL::Matrices::DenseMatrix< double, Device > denseMatrix( 5, 5 ); + auto denseView = denseMatrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double value, bool& compute ) mutable { + denseView.setElement( rowIdx, columnIdx, value ); + }; + + matrix.forAllRows( f ); + std::cout << "Original lambda matrix:" << std::endl << matrix << std::endl; + std::cout << "Dense matrix:" << std::endl << denseMatrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Copying matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Copying matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..fef2d377766da09f511f8678ad4bc5fa9050a44d --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cu @@ -0,0 +1 @@ +LambdaMatrixExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d5cf660a6297bc453b241d8b231942d9fa55c258 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >; + auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths ); + + TNL::Matrices::DenseMatrix< double, Device > denseMatrix( 5, 5 ); + auto denseView = denseMatrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double value, bool& compute ) mutable { + denseView.setElement( rowIdx, columnIdx, value ); + }; + + matrix.forRows( 0, matrix.getRows(), f ); + std::cout << "Original lambda matrix:" << std::endl << matrix << std::endl; + std::cout << "Dense matrix:" << std::endl << denseMatrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Copying matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Copying matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..6df275619c15af4f43617de7d068083cf4028590 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cu @@ -0,0 +1 @@ +LambdaMatrixExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..797a0bbbe7c050f71a5cdb2cdbcbdfe1b8423d66 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp @@ -0,0 +1,23 @@ +#include +#include + +int main( int argc, char* argv[] ) +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + const int size = 5; + auto matrix = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( size, size, matrixElements, rowLengths ); + + TNL::Containers::Vector< int > rowLengthsVector; + matrix.getCompressedRowLengths( rowLengthsVector ); + + std::cout << "Matrix looks as:" << std::endl << matrix << std::endl; + std::cout << "Compressed row lengths are: " << rowLengthsVector << std::endl; +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..03568be58ee58bb4f560ad98ed60f8d63afd4cad --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +LambdaMatrixExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp new file mode 100644 index 0000000000000000000000000000000000000000..43f015c48871995e450d9f5e953f4c9a411ecbf7 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_getNonzeroElementsCount.cpp @@ -0,0 +1,20 @@ +#include +#include + +int main( int argc, char* argv[] ) +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + const int size = 5; + auto matrix = TNL::Matrices::LambdaMatrixFactory< double, TNL::Devices::Host, int >::create( size, size, matrixElements, rowLengths ); + + std::cout << "Matrix looks as:" << std::endl << matrix << std::endl; + std::cout << "Non-zero elements count is: " << matrix.getNonzeroElementsCount() << std::endl; +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..17f3ace0d64ec2f95b4c20f28ec2609c2a36f3f7 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Lambda functions defining the matrix. + */ + auto rowLengths = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx ) -> int { return columns; }; + auto matrixElements = [=] __cuda_callable__ ( const int rows, const int columns, const int rowIdx, const int localIdx, int& columnIdx, double& value ) { + columnIdx = localIdx; + value = TNL::max( rowIdx - columnIdx + 1, 0 ); + }; + + using MatrixFactory = TNL::Matrices::LambdaMatrixFactory< double, Device, int >; + auto matrix = MatrixFactory::create( 5, 5, matrixElements, rowLengths ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..ecb2401ec9f6a98146b2320d6e6d21de4580bd07 --- /dev/null +++ b/Documentation/Examples/Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cu @@ -0,0 +1 @@ +LambdaMatrixExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..10a1ed7329f139deb785a313189abc39b092e02d --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/CMakeLists.txt @@ -0,0 +1,290 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_cuda MultidiagonalMatrixExample_Constructor.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out + OUTPUT MultidiagonalMatrixExample_Constructor.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1_cuda MultidiagonalMatrixExample_Constructor_init_list_1.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out + OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2_cuda MultidiagonalMatrixExample_Constructor_init_list_2.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out + OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType_cuda MultidiagonalMatrixExample_getSerializationType.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out + OUTPUT MultidiagonalMatrixExample_getSerializationType.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements_cuda MultidiagonalMatrixExample_setElements.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out + OUTPUT MultidiagonalMatrixExample_setElements.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths_cuda MultidiagonalMatrixExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out + OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow_cuda MultidiagonalMatrixExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out + OUTPUT MultidiagonalMatrixExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow_cuda MultidiagonalMatrixExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out + OUTPUT MultidiagonalMatrixExample_getRow.out ) + +# This example does not work with nvcc 10.1. Restore it here when it works. +# CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement_cuda MultidiagonalMatrixExample_setElement.cu ) +# ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement_cuda > +# ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out +# OUTPUT MultidiagonalMatrixExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement_cuda MultidiagonalMatrixExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out + OUTPUT MultidiagonalMatrixExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement_cuda MultidiagonalMatrixExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out + OUTPUT MultidiagonalMatrixExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction_cuda MultidiagonalMatrixExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out + OUTPUT MultidiagonalMatrixExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction_cuda MultidiagonalMatrixExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out + OUTPUT MultidiagonalMatrixExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forRows_cuda MultidiagonalMatrixExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forRows.out + OUTPUT MultidiagonalMatrixExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixExample_forAllRows_cuda MultidiagonalMatrixExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forAllRows.out + OUTPUT MultidiagonalMatrixExample_forAllRows.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda MultidiagonalMatrixViewExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out + OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow_cuda MultidiagonalMatrixViewExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out + OUTPUT MultidiagonalMatrixViewExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow_cuda MultidiagonalMatrixViewExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out + OUTPUT MultidiagonalMatrixViewExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement_cuda MultidiagonalMatrixViewExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out + OUTPUT MultidiagonalMatrixViewExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement_cuda MultidiagonalMatrixViewExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out + OUTPUT MultidiagonalMatrixViewExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement_cuda MultidiagonalMatrixViewExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out + OUTPUT MultidiagonalMatrixViewExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction_cuda MultidiagonalMatrixViewExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out + OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction_cuda MultidiagonalMatrixViewExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out + OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forRows_cuda MultidiagonalMatrixViewExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forRows.out + OUTPUT MultidiagonalMatrixViewExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forAllRows_cuda MultidiagonalMatrixViewExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forAllRows.out + OUTPUT MultidiagonalMatrixViewExample_forAllRows.out ) + +ELSE() + ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor MultidiagonalMatrixExample_Constructor.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor.out + OUTPUT MultidiagonalMatrixExample_Constructor.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_1 MultidiagonalMatrixExample_Constructor_init_list_1.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_1 > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_1.out + OUTPUT MultidiagonalMatrixExample_Constructor_init_list_1.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_Constructor_init_list_2 MultidiagonalMatrixExample_Constructor_init_list_2.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_Constructor_init_list_2 > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_Constructor_init_list_2.out + OUTPUT MultidiagonalMatrixExample_Constructor_init_list_2.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_getSerializationType MultidiagonalMatrixExample_getSerializationType.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getSerializationType > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getSerializationType.out + OUTPUT MultidiagonalMatrixExample_getSerializationType.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_setElements MultidiagonalMatrixExample_setElements.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElements > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElements.out + OUTPUT MultidiagonalMatrixExample_setElements.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_getCompressedRowLengths MultidiagonalMatrixExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getCompressedRowLengths.out + OUTPUT MultidiagonalMatrixExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_getConstRow MultidiagonalMatrixExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getConstRow.out + OUTPUT MultidiagonalMatrixExample_getConstRow.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_getRow MultidiagonalMatrixExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getRow.out + OUTPUT MultidiagonalMatrixExample_getRow.out ) + +# This example does not work with nvcc 10.1. Restore it here when it works. +# ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp ) +# ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement > +# ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out +# OUTPUT MultidiagonalMatrixExample_setElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_addElement MultidiagonalMatrixExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_addElement.out + OUTPUT MultidiagonalMatrixExample_addElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_getElement MultidiagonalMatrixExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_getElement.out + OUTPUT MultidiagonalMatrixExample_getElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_rowsReduction MultidiagonalMatrixExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_rowsReduction.out + OUTPUT MultidiagonalMatrixExample_rowsReduction.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_allRowsReduction MultidiagonalMatrixExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_allRowsReduction.out + OUTPUT MultidiagonalMatrixExample_allRowsReduction.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_forRows MultidiagonalMatrixExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forRows.out + OUTPUT MultidiagonalMatrixExample_forRows.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixExample_forAllRows MultidiagonalMatrixExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_forAllRows.out + OUTPUT MultidiagonalMatrixExample_forAllRows.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getCompressedRowLengths MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getCompressedRowLengths.out + OUTPUT MultidiagonalMatrixViewExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getConstRow MultidiagonalMatrixViewExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getConstRow.out + OUTPUT MultidiagonalMatrixViewExample_getConstRow.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getRow MultidiagonalMatrixViewExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getRow.out + OUTPUT MultidiagonalMatrixViewExample_getRow.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_setElement MultidiagonalMatrixViewExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_setElement.out + OUTPUT MultidiagonalMatrixViewExample_setElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_addElement MultidiagonalMatrixViewExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_addElement.out + OUTPUT MultidiagonalMatrixViewExample_addElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_getElement MultidiagonalMatrixViewExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_getElement.out + OUTPUT MultidiagonalMatrixViewExample_getElement.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_rowsReduction MultidiagonalMatrixViewExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_rowsReduction.out + OUTPUT MultidiagonalMatrixViewExample_rowsReduction.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_allRowsReduction MultidiagonalMatrixViewExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_allRowsReduction.out + OUTPUT MultidiagonalMatrixViewExample_allRowsReduction.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forRows MultidiagonalMatrixViewExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forRows.out + OUTPUT MultidiagonalMatrixViewExample_forRows.out ) + + ADD_EXECUTABLE( MultidiagonalMatrixViewExample_forAllRows MultidiagonalMatrixViewExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixViewExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixViewExample_forAllRows.out + OUTPUT MultidiagonalMatrixViewExample_forAllRows.out ) + +ENDIF() + + ADD_EXECUTABLE( MultidiagonalMatrixExample_setElement MultidiagonalMatrixExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND MultidiagonalMatrixExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MultidiagonalMatrixExample_setElement.out + OUTPUT MultidiagonalMatrixExample_setElement.out ) + + +ADD_CUSTOM_TARGET( RunMultidiagonalMatricesExamples ALL DEPENDS + MultidiagonalMatrixExample_Constructor.out + MultidiagonalMatrixExample_Constructor_init_list_1.out + MultidiagonalMatrixExample_Constructor_init_list_2.out + MultidiagonalMatrixExample_getSerializationType.out + MultidiagonalMatrixExample_setElements.out + MultidiagonalMatrixExample_getCompressedRowLengths.out + MultidiagonalMatrixExample_getConstRow.out + MultidiagonalMatrixExample_getRow.out + MultidiagonalMatrixExample_setElement.out + MultidiagonalMatrixExample_addElement.out + MultidiagonalMatrixExample_getElement.out + MultidiagonalMatrixExample_rowsReduction.out + MultidiagonalMatrixExample_allRowsReduction.out + MultidiagonalMatrixExample_forRows.out + MultidiagonalMatrixExample_forAllRows.out + MultidiagonalMatrixViewExample_getCompressedRowLengths.out + MultidiagonalMatrixViewExample_getConstRow.out + MultidiagonalMatrixViewExample_getRow.out + MultidiagonalMatrixViewExample_setElement.out + MultidiagonalMatrixViewExample_addElement.out + MultidiagonalMatrixViewExample_getElement.out + MultidiagonalMatrixViewExample_rowsReduction.out + MultidiagonalMatrixViewExample_allRowsReduction.out + MultidiagonalMatrixViewExample_forRows.out + MultidiagonalMatrixViewExample_forAllRows.out +) + diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8f8b8139bc172321d2037d931b51652c506581f2 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + /*** + * Set matrix representing approximation of the Laplace operator on regular + * grid using the finite difference method. + */ + const int gridSize( 4 ); + const int matrixSize = gridSize * gridSize; + TNL::Containers::Vector< int, Device > shifts { - gridSize, -1, 0, 1, gridSize }; + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( matrixSize, matrixSize, shifts ); + auto matrixView = matrix.getView(); + auto f = [=] __cuda_callable__ ( int i, int j ) mutable { + const int elementIdx = j * gridSize + i; + auto row = matrixView.getRow( elementIdx ); + if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) + row.setElement( 2, 1.0 ); // set matrix elements corresponding to boundary grid nodes + // and Dirichlet boundary conditions, i.e. 1 on the main diagonal + // which is the third one + else + { + row.setElement( 0, -1.0 ); // set matrix elements corresponding to inner grid nodes, i.e. + row.setElement( 1, -1.0 ); // 4 on the main diagonal (the third one) and -1 to the other + row.setElement( 2, 4.0 ); // sub-diagonals + row.setElement( 3, -1.0 ); + row.setElement( 4, -1.0 ); + } + }; + TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, gridSize, gridSize, f ); + + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu new file mode 120000 index 0000000000000000000000000000000000000000..7d790b5c9e1a87957e544064912a2d1d3864499a --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_Constructor.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1056ab9c6ae6a2f9b407298b97a9d91e35296e36 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + /*** + * Set matrix representing approximation of the Laplace operator on regular + * grid using the finite difference method. + */ + const int gridSize( 4 ); + const int matrixSize = gridSize * gridSize; + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( matrixSize, matrixSize, { - gridSize, -1, 0, 1, gridSize } ); + auto matrixView = matrix.getView(); + auto f = [=] __cuda_callable__ ( int i, int j ) mutable { + const int elementIdx = i * gridSize + j; + auto row = matrixView.getRow( elementIdx ); + if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) + row.setElement( 2, 1.0 ); // set matrix elements corresponding to boundary grid nodes + // and Dirichlet boundary conditions, i.e. 1 on the main diagonal + // which is the third one + else + { + row.setElement( 0, -1.0 ); // set matrix elements corresponding to inner grid nodes, i.e. + row.setElement( 1, -1.0 ); // 4 on the main diagonal (the third one) and -1 to the other + row.setElement( 2, 4.0 ); // sub-diagonals + row.setElement( 3, -1.0 ); + row.setElement( 4, -1.0 ); + } + }; + TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, gridSize, gridSize, f ); + + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu new file mode 120000 index 0000000000000000000000000000000000000000..6b3354ef624c1643a1893da638a7122335153d71 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_Constructor_init_list_1.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..60bcde7fc84d5c11bf6483729a0d8fbf33114599 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void createMultidiagonalMatrix() +{ + const int matrixSize = 6; + + /*** + * Setup the following matrix (dots represent zeros): + * + * / 4 -1 . -1 . . \ + * | -1 4 -1 . -1 . | + * | . -1 4 -1 . -1 | + * | -1 . -1 4 -1 . | + * | . -1 . -1 4 -1 | + * \ . . 1 . -1 4 / + * + * The diagonals offsets are { -3, -1, 0, 1, 3 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, { -3, -1, 0, 1, 3 }, { + /*** + * To set the matrix elements we first extend the diagonals to their full + * lengths even outside the matrix (dots represent zeros and zeros are + * artificial zeros used for memory alignment): + * + * 0 . 0 / 4 -1 . -1 . . \ -> { 0, 0, 4, -1, -1 } + * . 0 . | -1 4 -1 . -1 . | . -> { 0, -1, 4, -1, -1 } + * . . 0 | . -1 4 -1 . -1 | . . -> { 0, -1, 4, -1, -1 } + * . . | -1 . -1 4 -1 . | 0 . . -> { -1, -1, 4, -1, 0 } + * . | . -1 . -1 4 -1 | . 0 . . -> { -1, -1, 4, -1, 0 } + * \ . . 1 . -1 4 / 0 . 0 . . -> { -1, -1, 4, 0, 0 } + * + */ + { 0, 0, 4, -1, -1 }, + { 0, -1, 4, -1, -1 }, + { 0, -1, 4, -1, -1 }, + { -1, -1, 4, -1, 0 }, + { -1, -1, 4, -1, 0 }, + { -1, -1, 4, 0, 0 } + } ); + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Create multidiagonal matrix on CPU ... " << std::endl; + createMultidiagonalMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating multidiagonal matrix on CUDA GPU ... " << std::endl; + createMultidiagonalMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu new file mode 120000 index 0000000000000000000000000000000000000000..9098df5e3780bd23c761220d2c82770d44a5bcc6 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_Constructor_init_list_2.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a2da4e40cda5cba3d1f1084a0eee869e780d4c24 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp @@ -0,0 +1,39 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize, // number of columns + { -1, 0, 1 } ); // diagonals offsets + for( int i = 0; i < matrixSize; i++ ) + matrix.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < matrixSize; i++ ) + { + if( i > 0 ) + matrix.addElement( i, i - 1, 1.0, 5.0 ); + matrix.addElement( i, i, 1.0, 5.0 ); + if( i < matrixSize - 1 ) + matrix.addElement( i, i + 1, 1.0, 5.0 ); + } + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..04b6a5875bc8ee99d2a599ccde619ea4797a9ee0 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b102c73b680ca9f9c0c63f315b81c91fb24dbbf --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void allRowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { -2, -1, 0 }, // diagonals offsets + { { 0, 0, 1 }, // matrix elements + { 0, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + allRowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + allRowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..c8659a5f4bc549e90cc8e84a26f41eb5a1d74a2e --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0114acf63d946545d4a05f9015de26c1da65ff2c --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include + +template< typename Device > +void forAllRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5, // number of matrix columns + { -2, -1, 0 } ); // matrix diagonals offsets + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + */ + value = 3 - localIdx; + }; + matrix.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forAllRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forAllRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..b18e48f2bbac3fd52a1c814f0b90728cc72f1aa1 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..07382c2e3c809fd0a3d583564a656fc812e8e0f6 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5, // number of matrix columns + { -2, -1, 0 } ); // matrix diagonals offsets + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + */ + value = 3 - localIdx; + }; + matrix.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..aff0dad0c51477bd34514e1e58420365f75faea5 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5617b514d94259a88e7e9c5f0e4b6c7bd2c9e7da --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + const int gridSize( 4 ); + const int matrixSize = gridSize * gridSize; + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize, // number of columns + { - gridSize, -1, 0, 1, gridSize } // diagonals offsets + ); + matrix.setElements( { + { 0.0, 0.0, 1.0 }, // set matrix elements corresponding to boundary grid nodes + { 0.0, 0.0, 1.0 }, // and Dirichlet boundary conditions, i.e. 1 on the main diagonal + { 0.0, 0.0, 1.0 }, // which is the third one + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, // set matrix elements corresponding to inner grid nodes, i.e. 4 on the main diagonal + { -1.0, -1.0, 4.0, -1.0, -1.0 }, // (the third one) and -1 to the other sub-diagonals + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 } + } ); + TNL::Containers::Vector< int, Device > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; + std::cout << "Compressed row lengths: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..b711bdfdfd899ef94de502fe73c659b6ff72caff --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1cbda6be7f792de05607eecafebe938f93868ec8 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize = 5; + auto diagonalsOffsets = { -2, -1, 0 }; + using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix ( + matrixSize, // number of matrix rows + matrixSize, // number of matrix columns + diagonalsOffsets ); + matrix->setElements( + { { 0.0, 0.0, 1.0 }, + { 0.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 } } ); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = matrix->getRow( rowIdx ); + return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use MultidiagonalMatrixView. See + * MultidiagonalMatrixView::getConstRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; + std::cout << "Matrix trace is: " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..ec42cc67428b8927ad0212919924e31df4a3b044 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b1d7486fb5c249cdceefbff082b256ff0341c84f --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + matrixSize, // number of matrix columns + { -1, 0, 1 }, // matrix diagonals offsets + { // matrix elements definition + { 0.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, 0.0 } + } ); + + + for( int i = 0; i < matrixSize; i++ ) + { + for( int j = 0; j < matrixSize; j++ ) + std::cout << std::setw( 5 ) << matrix.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..6eb5f59c20cd825a25a8ae0b96918755e5f77606 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..856848bd3acb989e5cbcd8c3e66baf0f1f934009 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize( 5 ); + auto diagonalsOffsets = { -1, 0, 1 }; // Variadic templates in SharedPointer + // constructor do not recognize initializer + // list so we give it a hint. + using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix( + matrixSize, // number of matrix rows + matrixSize, // number of matrix columns + diagonalsOffsets ); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + //auto row = matrix->getRow( rowIdx ); + // For some reason the previous line of code is not accepted by nvcc 10.1 + // so we replace it with the following two lines. + auto ref = matrix.modifyData(); + auto row = ref.getRow( rowIdx ); + + if( rowIdx > 0 ) + row.setElement( 0, -1.0 ); // elements below the diagonal + row.setElement( 1, 2.0 ); // elements on the diagonal + if( rowIdx < matrixSize - 1 ) // elements above the diagonal + row.setElement( 2, -1.0 ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use MultidiagonalMatrixView. See + * MultidiagonalMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f ); + std::cout << std::endl << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + // It seems that nvcc 10.1 does not handle lambda functions properly. + // It is hard to make nvcc to compile this example and it does not work + // properly. We will try it with later version of CUDA. + //std::cout << "Getting matrix rows on CUDA device: " << std::endl; + //getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..6eca2f44497da66297dec924982041403e63eb58 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a72e90dce2cc97a278f6c91e7c03cf5a5dd1b851 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + + +template< typename Device > +void getSerializationTypeExample() +{ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix; + + std::cout << "Matrix type is: " << matrix.getSerializationType(); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get serialization type on CPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get serialization type on CUDA GPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu new file mode 120000 index 0000000000000000000000000000000000000000..322cb05b89cce1472fdae1a6b93d0043300e30d7 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_getSerializationType.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dc3d4048384c48e85d952d2f35a10ad55a40d491 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { -2, -1, 0 }, // diagonals offsets + { { 0, 0, 1 }, // matrix elements + { 0, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..adaff28e8a549eeb8b5539535f78e5cc3594f698 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf28ec22661be19d99fdfb31b48a2f2e44f46285 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + const int matrixSize( 5 ); + auto diagonalsOffsets = { -1, 0, 1 }; // offsets of tridiagonal matrix + using Matrix = TNL::Matrices::MultidiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< Matrix > matrix( matrixSize, matrixSize, diagonalsOffsets ); + for( int i = 0; i < 5; i++ ) + matrix->setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << *matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + if( i > 0 ) + matrix->setElement( i, i - 1, 1.0 ); + matrix->setElement( i, i, -i ); + if( i < matrixSize - 1 ) + matrix->setElement( i, i + 1, 1.0 ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use MultidiagonalMatrixView. See + * MultidiagonalMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + // It seems that nvcc 10.1 does not handle lambda functions properly. + // It is hard to make nvcc to compile this example and it does not work + // properly. We will try it with later version of CUDA. + //std::cout << "Set elements on CUDA device:" << std::endl; + //setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..88a0fa864e713cd8d5c1c27c20f16aa108e2184a --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b43f2e341d53202a664fdb8452e5f1c91cd60b18 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void createMultidiagonalMatrix() +{ + const int matrixSize = 6; + + /*** + * Setup the following matrix (dots represent zeros): + * + * / 4 -1 . -1 . . \ + * | -1 4 -1 . -1 . | + * | . -1 4 -1 . -1 | + * | -1 . -1 4 -1 . | + * | . -1 . -1 4 -1 | + * \ . . 1 . -1 4 / + * + * The diagonals offsets are { -3, -1, 0, 1, 3 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, // number of matrix rows + matrixSize, // number of matrix columns + { -3, -1, 0, 1, 3 } ); // matrix diagonals offsets + matrix.setElements( { + /*** + * To set the matrix elements we first extend the diagonals to their full + * lengths even outside the matrix (dots represent zeros and zeros are + * artificial zeros used for memory alignment): + * + * 0 . 0 / 4 -1 . -1 . . \ -> { 0, 0, 4, -1, -1 } + * . 0 . | -1 4 -1 . -1 . | . -> { 0, -1, 4, -1, -1 } + * . . 0 | . -1 4 -1 . -1 | . . -> { 0, -1, 4, -1, -1 } + * . . | -1 . -1 4 -1 . | 0 . . -> { -1, -1, 4, -1, 0 } + * . | . -1 . -1 4 -1 | . 0 . . -> { -1, -1, 4, -1, 0 } + * \ . . 1 . -1 4 / 0 . 0 . . -> { -1, -1, 4, 0, 0 } + * + */ + { 0, 0, 4, -1, -1 }, + { 0, -1, 4, -1, -1 }, + { 0, -1, 4, -1, -1 }, + { -1, -1, 4, -1, 0 }, + { -1, -1, 4, -1, 0 }, + { -1, -1, 4, 0, 0 } + } ); + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Create multidiagonal matrix on CPU ... " << std::endl; + createMultidiagonalMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating multidiagonal matrix on CUDA GPU ... " << std::endl; + createMultidiagonalMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu new file mode 120000 index 0000000000000000000000000000000000000000..b5a31ea141f6fcba723e453926d54692852a7a6e --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cu @@ -0,0 +1 @@ +MultidiagonalMatrixExample_setElements.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46f92d7a3a60f7f277e34cb36961f31f4b52ec3f --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp @@ -0,0 +1,40 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize, // number of columns + { -1, 0, 1 } ); // diagonals offsets + auto view = matrix.getView(); + for( int i = 0; i < matrixSize; i++ ) + view.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < matrixSize; i++ ) + { + if( i > 0 ) + view.addElement( i, i - 1, 1.0, 5.0 ); + view.addElement( i, i, 1.0, 5.0 ); + if( i < matrixSize - 1 ) + view.addElement( i, i + 1, 1.0, 5.0 ); + } + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..7d6a43822f99a19983647d1ef40eb46b0f6b3234 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..26dac464cea68636fb61458ab2e86f3ca153ed56 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void allRowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { -2, -1, 0 }, // diagonals offsets + { { 0, 0, 1 }, // matrix elements + { 0, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 } } ); + auto view = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + allRowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + allRowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..5e14692de8159377e123ed8fec43cd750143b986 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..143aa864c88a16c2100027b1e32524456f67c991 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include + +template< typename Device > +void forAllRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5, // number of matrix columns + { -2, -1, 0 } ); // matrix diagonals offsets + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + */ + value = 3 - localIdx; + }; + view.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forAllRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forAllRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..2138ba26b417da638d42e088312ed929aa50ff94 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..23aa067531f46d1581c835ffb97f63cfa2ad65ca --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5, // number of matrix columns + { -2, -1, 0 } ); // matrix diagonals offsets + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + */ + value = 3 - localIdx; + }; + view.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..ec3f1ad70dabbf5aaf7ac170a72b10868d18df6e --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b366ab3dc6cb84e377150dd0066eb86fc08cb85 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + const int gridSize( 4 ); + const int matrixSize = gridSize * gridSize; + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize, // number of columns + { - gridSize, -1, 0, 1, gridSize } // diagonals offsets + ); + matrix.setElements( { + { 0.0, 0.0, 1.0 }, // set matrix elements corresponding to boundary grid nodes + { 0.0, 0.0, 1.0 }, // and Dirichlet boundary conditions, i.e. 1 on the main diagonal + { 0.0, 0.0, 1.0 }, // which is the third one + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, // set matrix elements corresponding to inner grid nodes, i.e. 4 on the main diagonal + { -1.0, -1.0, 4.0, -1.0, -1.0 }, // (the third one) and -1 to the other sub-diagonals + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 } + } ); + auto view = matrix.getView(); + TNL::Containers::Vector< int, Device > rowLengths; + view.getCompressedRowLengths( rowLengths ); + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; + std::cout << "Compressed row lengths: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..77d01be55991c4e8e5a7668698cbda6aba1ef0e6 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..748c9566590058b95881eacee68c4656c8890d7f --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize = 5; + auto diagonalsOffsets = { -2, -1, 0 }; + using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; + MatrixType matrix ( + matrixSize, // number of matrix columns + diagonalsOffsets, + { { 0.0, 0.0, 1.0 }, // matrix elements + { 0.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 } } ); + auto view = matrix.getView(); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = view.getRow( rowIdx ); + return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal + }; + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix.getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Matrix trace is: " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..bb31b7a5dd4c7109ff7d27ed0322ad04526b0b27 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bf12266623a56d9859856bad1aa2342637ccd4f9 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + matrixSize, // number of matrix columns + { -1, 0, 1 }, // matrix diagonals offsets + { // matrix elements definition + { 0.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, 0.0 } + } ); + auto view = matrix.getView(); + + for( int i = 0; i < matrixSize; i++ ) + { + for( int j = 0; j < matrixSize; j++ ) + std::cout << std::setw( 5 ) << view.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..a246e2dd9284769702131e24ffd4ee2d4d8eb507 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ac322f9aa61ccece6b0f5da4e6d911b29d0d48b7 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp @@ -0,0 +1,46 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize( 5 ); + auto diagonalsOffsets = { -1, 0, 1 }; // Variadic templates in SharedPointer + // constructor do not recognize initializer + // list so we give it a hint. + using MatrixType = TNL::Matrices::MultidiagonalMatrix< double, Device >; + MatrixType matrix( + matrixSize, // number of matrix rows + matrixSize, // number of matrix columns + diagonalsOffsets ); + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + auto row = view.getRow( rowIdx ); + if( rowIdx > 0 ) + row.setElement( 0, -1.0 ); // elements below the diagonal + row.setElement( 1, 2.0 ); // elements on the diagonal + if( rowIdx < matrixSize - 1 ) // elements above the diagonal + row.setElement( 2, -1.0 ); + }; + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); + std::cout << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..2ac03ebc3ff65d20df1530e52bf960584299ed3d --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6793d8bb29969118bd8bcce954d09364db730618 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 0 / 1 . . . . \ -> { 0, 0, 1 } + * 0 | 2 1 . . . | -> { 0, 2, 1 } + * | 3 2 1 . . | -> { 3, 2, 1 } + * | . 3 2 1 . | -> { 3, 2, 1 } + * \ . . 3 2 1 / -> { 3, 2, 1 } + * + * The diagonals offsets are { -2, -1, 0 }. + */ + TNL::Matrices::MultidiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { -2, -1, 0 }, // diagonals offsets + { { 0, 0, 1 }, // matrix elements + { 0, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 }, + { 3, 2, 1 } } ); + auto view = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + view.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..44df3d3ed38482b4495dcf4aa49f24fc09955650 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41f93667009eece80c94dc5bbe1433c2dd460aeb --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + const int matrixSize( 5 ); + auto diagonalsOffsets = { -1, 0, 1 }; // offsets of tridiagonal matrix + using Matrix = TNL::Matrices::MultidiagonalMatrix< double, Device >; + Matrix matrix( matrixSize, matrixSize, diagonalsOffsets ); + auto view = matrix.getView(); + + for( int i = 0; i < 5; i++ ) + view.setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + if( i > 0 ) + view.setElement( i, i - 1, 1.0 ); + view.setElement( i, i, -i ); + if( i < matrixSize - 1 ) + view.setElement( i, i + 1, 1.0 ); + }; + + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..05c1930583f1a7a53091b15f8aeb14b9685b3db7 --- /dev/null +++ b/Documentation/Examples/Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cu @@ -0,0 +1 @@ +MultidiagonalMatrixViewExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3f0410315d45d92555668125a50a258d07df97d1 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/CMakeLists.txt @@ -0,0 +1,315 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1_cuda SparseMatrixExample_Constructor_init_list_1.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out + OUTPUT SparseMatrixExample_Constructor_init_list_1.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2_cuda SparseMatrixExample_Constructor_init_list_2.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out + OUTPUT SparseMatrixExample_Constructor_init_list_2.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map_cuda SparseMatrixExample_Constructor_std_map.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out + OUTPUT SparseMatrixExample_Constructor_std_map.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_getSerializationType_cuda SparseMatrixExample_getSerializationType.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out + OUTPUT SparseMatrixExample_getSerializationType.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities_cuda SparseMatrixExample_setRowCapacities.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out + OUTPUT SparseMatrixExample_setRowCapacities.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_cuda SparseMatrixExample_setElements.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out + OUTPUT SparseMatrixExample_setElements.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElements_map_cuda SparseMatrixExample_setElements_map.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out + OUTPUT SparseMatrixExample_setElements_map.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths_cuda SparseMatrixExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out + OUTPUT SparseMatrixExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_getConstRow_cuda SparseMatrixExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out + OUTPUT SparseMatrixExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_getRow_cuda SparseMatrixExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out + OUTPUT SparseMatrixExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_setElement_cuda SparseMatrixExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out + OUTPUT SparseMatrixExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_addElement_cuda SparseMatrixExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out + OUTPUT SparseMatrixExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_getElement_cuda SparseMatrixExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out + OUTPUT SparseMatrixExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_rowsReduction_cuda SparseMatrixExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out + OUTPUT SparseMatrixExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction_cuda SparseMatrixExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out + OUTPUT SparseMatrixExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_forRows_cuda SparseMatrixExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forRows.out + OUTPUT SparseMatrixExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixExample_forAllRows_cuda SparseMatrixExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forAllRows.out + OUTPUT SparseMatrixExample_forAllRows.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType_cuda SparseMatrixViewExample_getSerializationType.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out + OUTPUT SparseMatrixViewExample_getSerializationType.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths_cuda SparseMatrixViewExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out + OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow_cuda SparseMatrixViewExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out + OUTPUT SparseMatrixViewExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getRow_cuda SparseMatrixViewExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out + OUTPUT SparseMatrixViewExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_setElement_cuda SparseMatrixViewExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out + OUTPUT SparseMatrixViewExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_addElement_cuda SparseMatrixViewExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out + OUTPUT SparseMatrixViewExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_getElement_cuda SparseMatrixViewExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out + OUTPUT SparseMatrixViewExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction_cuda SparseMatrixViewExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out + OUTPUT SparseMatrixViewExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction_cuda SparseMatrixViewExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out + OUTPUT SparseMatrixViewExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forRows_cuda SparseMatrixViewExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forRows.out + OUTPUT SparseMatrixViewExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( SparseMatrixViewExample_forAllRows_cuda SparseMatrixViewExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forAllRows.out + OUTPUT SparseMatrixViewExample_forAllRows.out ) + +ELSE() + ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_1 SparseMatrixExample_Constructor_init_list_1.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_1 > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_1.out + OUTPUT SparseMatrixExample_Constructor_init_list_1.out ) + + ADD_EXECUTABLE( SparseMatrixExample_Constructor_init_list_2 SparseMatrixExample_Constructor_init_list_2.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_init_list_2 > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_init_list_2.out + OUTPUT SparseMatrixExample_Constructor_init_list_2.out ) + + ADD_EXECUTABLE( SparseMatrixExample_Constructor_std_map SparseMatrixExample_Constructor_std_map.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_Constructor_std_map > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_Constructor_std_map.out + OUTPUT SparseMatrixExample_Constructor_std_map.out ) + + ADD_EXECUTABLE( SparseMatrixExample_getSerializationType SparseMatrixExample_getSerializationType.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getSerializationType > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getSerializationType.out + OUTPUT SparseMatrixExample_getSerializationType.out ) + + ADD_EXECUTABLE( SparseMatrixExample_setRowCapacities SparseMatrixExample_setRowCapacities.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setRowCapacities > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setRowCapacities.out + OUTPUT SparseMatrixExample_setRowCapacities.out ) + + ADD_EXECUTABLE( SparseMatrixExample_setElements SparseMatrixExample_setElements.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements.out + OUTPUT SparseMatrixExample_setElements.out ) + + ADD_EXECUTABLE( SparseMatrixExample_setElements_map SparseMatrixExample_setElements_map.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElements_map > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElements_map.out + OUTPUT SparseMatrixExample_setElements_map.out ) + + ADD_EXECUTABLE( SparseMatrixExample_getCompressedRowLengths SparseMatrixExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getCompressedRowLengths.out + OUTPUT SparseMatrixExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( SparseMatrixExample_getConstRow SparseMatrixExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getConstRow.out + OUTPUT SparseMatrixExample_getConstRow.out ) + + ADD_EXECUTABLE( SparseMatrixExample_getRow SparseMatrixExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getRow.out + OUTPUT SparseMatrixExample_getRow.out ) + + ADD_EXECUTABLE( SparseMatrixExample_setElement SparseMatrixExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_setElement.out + OUTPUT SparseMatrixExample_setElement.out ) + + ADD_EXECUTABLE( SparseMatrixExample_addElement SparseMatrixExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_addElement.out + OUTPUT SparseMatrixExample_addElement.out ) + + ADD_EXECUTABLE( SparseMatrixExample_getElement SparseMatrixExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_getElement.out + OUTPUT SparseMatrixExample_getElement.out ) + + ADD_EXECUTABLE( SparseMatrixExample_rowsReduction SparseMatrixExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_rowsReduction.out + OUTPUT SparseMatrixExample_rowsReduction.out ) + + ADD_EXECUTABLE( SparseMatrixExample_allRowsReduction SparseMatrixExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_allRowsReduction.out + OUTPUT SparseMatrixExample_allRowsReduction.out ) + + ADD_EXECUTABLE( SparseMatrixExample_forRows SparseMatrixExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forRows.out + OUTPUT SparseMatrixExample_forRows.out ) + + ADD_EXECUTABLE( SparseMatrixExample_forAllRows SparseMatrixExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixExample_forAllRows.out + OUTPUT SparseMatrixExample_forAllRows.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_getSerializationType SparseMatrixViewExample_getSerializationType.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getSerializationType > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getSerializationType.out + OUTPUT SparseMatrixViewExample_getSerializationType.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_getCompressedRowLengths SparseMatrixViewExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getCompressedRowLengths.out + OUTPUT SparseMatrixViewExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_getConstRow SparseMatrixViewExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getConstRow.out + OUTPUT SparseMatrixViewExample_getConstRow.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_getRow SparseMatrixViewExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getRow.out + OUTPUT SparseMatrixViewExample_getRow.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_setElement SparseMatrixViewExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_setElement.out + OUTPUT SparseMatrixViewExample_setElement.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_addElement SparseMatrixViewExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_addElement.out + OUTPUT SparseMatrixViewExample_addElement.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_getElement SparseMatrixViewExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_getElement.out + OUTPUT SparseMatrixViewExample_getElement.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_rowsReduction SparseMatrixViewExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_rowsReduction.out + OUTPUT SparseMatrixViewExample_rowsReduction.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_allRowsReduction SparseMatrixViewExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_allRowsReduction.out + OUTPUT SparseMatrixViewExample_allRowsReduction.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_forRows SparseMatrixViewExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forRows.out + OUTPUT SparseMatrixViewExample_forRows.out ) + + ADD_EXECUTABLE( SparseMatrixViewExample_forAllRows SparseMatrixViewExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND SparseMatrixViewExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SparseMatrixViewExample_forAllRows.out + OUTPUT SparseMatrixViewExample_forAllRows.out ) + +ENDIF() + +ADD_CUSTOM_TARGET( RunSparseMatricesExamples ALL DEPENDS + SparseMatrixExample_Constructor_init_list_1.out + SparseMatrixExample_Constructor_init_list_2.out + SparseMatrixExample_Constructor_std_map.out + SparseMatrixExample_getSerializationType.out + SparseMatrixExample_setRowCapacities.out + SparseMatrixExample_setElements.out + SparseMatrixExample_setElements_map.out + SparseMatrixExample_getCompressedRowLengths.out + SparseMatrixExample_getConstRow.out + SparseMatrixExample_getRow.out + SparseMatrixExample_setElement.out + SparseMatrixExample_addElement.out + SparseMatrixExample_getElement.out + SparseMatrixExample_rowsReduction.out + SparseMatrixExample_allRowsReduction.out + SparseMatrixExample_forRows.out + SparseMatrixExample_forAllRows.out + SparseMatrixViewExample_getSerializationType.out + SparseMatrixViewExample_getCompressedRowLengths.out + SparseMatrixViewExample_getConstRow.out + SparseMatrixViewExample_getRow.out + SparseMatrixViewExample_setElement.out + SparseMatrixViewExample_addElement.out + SparseMatrixViewExample_getElement.out + SparseMatrixViewExample_rowsReduction.out + SparseMatrixViewExample_allRowsReduction.out + SparseMatrixViewExample_forRows.out + SparseMatrixViewExample_forAllRows.out +) + diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92524b173b8e3d1b03b6280ca101b6de415fe427 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + + +template< typename Device > +void initializerListExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix { + { 1, 2, 3, 4, 5 }, // row capacities + 6 }; // number of matrix columns + + for( int row = 0; row < matrix.getRows(); row++ ) + for( int column = 0; column <= row; column++ ) + matrix.setElement( row, column, row - column + 1 ); + std::cout << "General sparse matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrices on CPU ... " << std::endl; + initializerListExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrices on CUDA GPU ... " << std::endl; + initializerListExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu new file mode 120000 index 0000000000000000000000000000000000000000..4afbecd7fbbc3cf0cb99df2cba261f6feb557037 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cu @@ -0,0 +1 @@ +SparseMatrixExample_Constructor_init_list_1.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e14f0618887d2e64fee590ec2e1390fa3c39e77b --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp @@ -0,0 +1,31 @@ +#include +#include +#include + + +template< typename Device > +void initializerListExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( + 5, // number of matrix rows + 5, // number of matrix columns + { // matrix elements definition + { 0, 0, 2.0 }, + { 1, 0, -1.0 }, { 1, 1, 2.0 }, { 1, 2, -1.0 }, + { 2, 1, -1.0 }, { 2, 2, 2.0 }, { 2, 3, -1.0 }, + { 3, 2, -1.0 }, { 3, 3, 2.0 }, { 3, 4, -1.0 }, + { 4, 4, 2.0 } } ); + + std::cout << "General sparse matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrices on CPU ... " << std::endl; + initializerListExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrices on CUDA GPU ... " << std::endl; + initializerListExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu new file mode 120000 index 0000000000000000000000000000000000000000..112624ab390c7deac6e6e84eec346f7931d0a583 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cu @@ -0,0 +1 @@ +SparseMatrixExample_Constructor_init_list_2.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8f64d0e9b5363e0845f9e392cd23b9c2057c5363 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void initializerListExample() +{ + std::map< std::pair< int, int >, double > map; + map.insert( std::make_pair( std::make_pair( 0, 0 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 0 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 1 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 2 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 1 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 2 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 3 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 2 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 3 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 4 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 4, 4 ), 2.0 ) ); + + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, map ); + + std::cout << "General sparse matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrices on CPU ... " << std::endl; + initializerListExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrices on CUDA GPU ... " << std::endl; + initializerListExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu new file mode 120000 index 0000000000000000000000000000000000000000..3f08e48c77bb1fdeb5eb54a2e8c61748db90624c --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cu @@ -0,0 +1 @@ +SparseMatrixExample_Constructor_std_map.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1b51f494f244ccf3e2054c25b4b77da12c9806ed --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp @@ -0,0 +1,30 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 5, 5, 5, 5, 5 }, 5 ); + for( int i = 0; i < 5; i++ ) + matrix.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < 5; i++ ) + for( int j = 0; j < 5; j++ ) + matrix.addElement( i, j, 1.0, 5.0 ); + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..2ec36ad7459e3947e785ac44b3a0718f37d86c2d --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_addElement.cu @@ -0,0 +1 @@ +SparseMatrixExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9ce31f09e42936f4c1a754b7cc94978a1e8ab1d4 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void allRowsReduction() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, { + { 0, 0, 1 }, + { 1, 1, 1 }, { 1, 2, 8 }, + { 2, 2, 1 }, { 2, 3, 9 }, + { 3, 3, 1 }, { 3, 4, 9 }, + { 4, 4, 1 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view and matrix view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "All rows reduction on host:" << std::endl; + allRowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "All rows reduction on CUDA device:" << std::endl; + allRowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..f087b816fa0cd3b657956475bb9c4c0f3f9769dc --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cu @@ -0,0 +1 @@ +SparseMatrixExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..739600539260bba9e11c703c83b2d56ed8a75ff7 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void forAllRowsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 ); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) // This is important, some matrix formats may allocate more matrix elements + // than we requested. These padding elements are processed here as well. + compute = false; + else + { + columnIdx = localIdx; + value = rowIdx + localIdx; + } + }; + + matrix.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forAllRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forAllRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..51cc7bd49f1754bd7d04c05c448d852572599cef --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cu @@ -0,0 +1 @@ +SparseMatrixExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2330c2ca5d94439726dc4df53ef9977116d43de0 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 ); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) // This is important, some matrix formats may allocate more matrix elements + // than we requested. These padding elements are processed here as well. + compute = false; + else + { + columnIdx = localIdx; + value = rowIdx + localIdx; + } + }; + + matrix.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..87c20fbe0e9e4ca72cd80150073726e21813b0cf --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_forRows.cu @@ -0,0 +1 @@ +SparseMatrixExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e56abe484cdcc5f14bcb35dfc8f16d53946dd683 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp @@ -0,0 +1,34 @@ +#include +#include +#include +#include + +template< typename Device > +void getCompressedRowLengthsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > triangularMatrix( 5, 5 ); + triangularMatrix.setElements( { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 0, 4 }, { 2, 1, 5 }, { 2, 2, 6 }, + { 3, 0, 7 }, { 3, 1, 8 }, { 3, 2, 9 }, { 3, 3, 10 }, + { 4, 0, 11 }, { 4, 1, 12 }, { 4, 2, 13 }, { 4, 3, 14 }, { 4, 4, 15 } } ); + + std::cout << triangularMatrix << std::endl; + + TNL::Containers::Vector< int, Device > rowLengths; + triangularMatrix.getCompressedRowLengths( rowLengths ); + + std::cout << "Compressed row lengths are: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting compressed row lengths on host: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting compressed row lengths on CUDA device: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..8fc20b77f27feda756430c7cfadc718cf1f81a71 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +SparseMatrixExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..747a3c8258bc856034bbe07ba3867ccdd7e8816a --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix ( 5, 5 ); + matrix->setElements( + { { 0, 0, 1 }, + { 1, 0, 1 }, { 1, 1, 2 }, + { 2, 0, 1 }, { 2, 1, 2 }, { 2, 2, 3 }, + { 3, 0, 1 }, { 3, 1, 2 }, { 3, 2, 3 }, { 3, 3, 4 }, + { 4, 0, 1 }, { 4, 1, 2 }, { 4, 2, 3 }, { 4, 3, 4 }, { 4, 4, 5 } } ); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = matrix->getRow( rowIdx ); + return row.getValue( rowIdx ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use SparseMatrixView. See + * SparseMatrixView::getConstRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix trace is " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..03a7d1e7a9b8ec37ac8f35bd297dec564693d8fd --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cu @@ -0,0 +1 @@ +SparseMatrixExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4db76597950f21c8897e3864916775fc353fe9e7 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( + 5, // number of matrix rows + 5, // number of matrix columns + { // matrix elements definition + { 0, 0, 2.0 }, + { 1, 0, -1.0 }, { 1, 1, 2.0 }, { 1, 2, -1.0 }, + { 2, 1, -1.0 }, { 2, 2, 2.0 }, { 2, 3, -1.0 }, + { 3, 2, -1.0 }, { 3, 3, 2.0 }, { 3, 4, -1.0 }, + { 4, 4, 2.0 } } ); + + + for( int i = 0; i < 5; i++ ) + { + for( int j = 0; j < 5; j++ ) + std::cout << std::setw( 5 ) << matrix.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..bb3bc66362385752f09020b95a7d7c00d890af76 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getElement.cu @@ -0,0 +1 @@ +SparseMatrixExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d52602f082953a13d8a6fa54fcc01e52f0a6c749 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + auto rowCapacities = { 1, 1, 1, 1, 1 }; // Variadic templates in SharedPointer + // constructor do not recognize initializer + // list so we give it a hint. + using MatrixType = TNL::Matrices::SparseMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix( rowCapacities, 5 ); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + auto row = matrix->getRow( rowIdx ); + row.setElement( 0, rowIdx, 10 * ( rowIdx + 1 ) ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use SparseMatrixView. See + * SparseMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f ); + std::cout << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..776f8a9d3819ec6fb8392a9373aa8e761996f8f2 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getRow.cu @@ -0,0 +1 @@ +SparseMatrixExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35dbf04063743896ce8d8b2e7e31d8c5721b6129 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + + +template< typename Device > +void getSerializationTypeExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix; + + std::cout << "Matrix type is: " << matrix.getSerializationType(); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get serialization type on CPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get serialization type on CUDA GPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu new file mode 120000 index 0000000000000000000000000000000000000000..a042183a0a0203b339c25d9aab429518570f9689 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cu @@ -0,0 +1 @@ +SparseMatrixExample_getSerializationType.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..20279888a38b10ff41dfef49a44cb4a546f19359 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, { + { 0, 0, 1 }, + { 1, 1, 1 }, { 1, 2, 8 }, + { 2, 2, 1 }, { 2, 3, 9 }, + { 3, 3, 1 }, { 3, 4, 9 }, + { 4, 4, 1 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..212f162886a3d0bb77aca1b75d7596048409df8b --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cu @@ -0,0 +1 @@ +SparseMatrixExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..178e502dca1fe4b249397173fda21305e3e152e6 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + auto rowCapacities = { 1, 1, 1, 1, 1 }; + TNL::Pointers::SharedPointer< TNL::Matrices::SparseMatrix< double, Device > > matrix( rowCapacities, 5 ); + for( int i = 0; i < 5; i++ ) + matrix->setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << *matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + matrix->setElement( i, i, -i ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use SparseMatrixView. See + * SparseMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..97b115c3fe344d86091e530bd9b8fe7fd63bced5 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElement.cu @@ -0,0 +1 @@ +SparseMatrixExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ff0c0bde8aeada867578c43c1e2e2da3fd90ca8f --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include + +template< typename Device > +void setElementsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5 ); // matrix dimensions + matrix.setElements( { // matrix elements definition + { 0, 0, 2.0 }, + { 1, 0, -1.0 }, { 1, 1, 2.0 }, { 1, 2, -1.0 }, + { 2, 1, -1.0 }, { 2, 2, 2.0 }, { 2, 3, -1.0 }, + { 3, 2, -1.0 }, { 3, 3, 2.0 }, { 3, 4, -1.0 }, + { 4, 4, 2.0 } } ); + + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Setting matrix elements on host: " << std::endl; + setElementsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Setting matrix elements on CUDA device: " << std::endl; + setElementsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu new file mode 120000 index 0000000000000000000000000000000000000000..dfbad6b2724a487f50f94174bc93aee084c26729 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements.cu @@ -0,0 +1 @@ +SparseMatrixExample_setElements.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7bdbbc1738a370fd9120cb461312b6bcc275b6e --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void setElementsExample() +{ + std::map< std::pair< int, int >, double > map; + map.insert( std::make_pair( std::make_pair( 0, 0 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 0 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 1 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 1, 2 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 1 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 2 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 2, 3 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 2 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 3 ), 2.0 ) ); + map.insert( std::make_pair( std::make_pair( 3, 4 ), -1.0 ) ); + map.insert( std::make_pair( std::make_pair( 4, 4 ), 2.0 ) ); + + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5 ); + matrix.setElements( map ); + + std::cout << "General sparse matrix: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrices on CPU ... " << std::endl; + setElementsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrices on CUDA GPU ... " << std::endl; + setElementsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu new file mode 120000 index 0000000000000000000000000000000000000000..aaf831b5d5e38e29dfeb37589597c69c1469e2e9 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cu @@ -0,0 +1 @@ +SparseMatrixExample_setElements_map.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f282aee6d724c01925d83ee9c9ec79ac3d1a8a66 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void setRowCapacitiesExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( 5, 5 ); + TNL::Containers::Vector< int, Device > rowCapacities{ 1, 2, 3, 4, 5 }; + matrix.setRowCapacities( rowCapacities ); + for( int row = 0; row < 5; row++ ) + for( int column = 0; column <= row; column++ ) + matrix.setElement( row, column, row - column + 1 ); + + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrices on CPU ... " << std::endl; + setRowCapacitiesExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrices on CUDA GPU ... " << std::endl; + setRowCapacitiesExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu new file mode 120000 index 0000000000000000000000000000000000000000..77bb1a91f96edc23a4e80b591dc123ef59230077 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cu @@ -0,0 +1 @@ +SparseMatrixExample_setRowCapacities.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9871885195ea40c53ca8b4be637972bb501e1e79 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp @@ -0,0 +1,31 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 5, 5, 5, 5, 5 }, 5 ); + auto view = matrix.getView(); + for( int i = 0; i < 5; i++ ) + view.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < 5; i++ ) + for( int j = 0; j < 5; j++ ) + view.addElement( i, j, 1.0, 5.0 ); + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..505aa14b1004cda30a853c9d74616bf06cb75758 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98a03b941697c10c29609febfb5be0a01cd635cb --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void allRowsReduction() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, { + { 0, 0, 1 }, + { 1, 1, 1 }, { 1, 2, 8 }, + { 2, 2, 1 }, { 2, 3, 9 }, + { 3, 3, 1 }, { 3, 4, 9 }, + { 4, 4, 1 } } ); + auto matrixView = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view and matrix view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrixView.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "All rows reduction on host:" << std::endl; + allRowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "All rows reduction on CUDA device:" << std::endl; + allRowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..d63cf05c5786aa73da2585050804d26340c75a9f --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fda71a42ff2cbf17520e36ca1390f311441a0c98 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +template< typename Device > +void forAllRowsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 ); + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) // This is important, some matrix formats may allocate more matrix elements + // than we requested. These padding elements are processed here as well. + compute = false; + else + { + columnIdx = localIdx; + value = rowIdx + localIdx; + } + }; + + view.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forAllRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forAllRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..dd77d11f9d9a75474a5e880d5167ff2a3640ba6b --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..987c3dec4364fac94cf9c25f2dd7c4aa8493f184 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 2, 3, 4, 5 }, 5 ); + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int& columnIdx, double& value, bool& compute ) { + if( rowIdx < columnIdx ) // This is important, some matrix formats may allocate more matrix elements + // than we requested. These padding elements are processed here as well. + compute = false; + else + { + columnIdx = localIdx; + value = rowIdx + localIdx; + } + }; + + view.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..5058dc6cfd7adb63f9d10d2699d6b9b530fd6c90 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0cc12da83447dffca728a0ed26346c208b7122fe --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +template< typename Device > +void getCompressedRowLengthsExample() +{ + TNL::Matrices::SparseMatrix< double, Device > triangularMatrix( 5, 5 ); + triangularMatrix.setElements( { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 0, 4 }, { 2, 1, 5 }, { 2, 2, 6 }, + { 3, 0, 7 }, { 3, 1, 8 }, { 3, 2, 9 }, { 3, 3, 10 }, + { 4, 0, 11 }, { 4, 1, 12 }, { 4, 2, 13 }, { 4, 3, 14 }, { 4, 4, 15 } } ); + + std::cout << triangularMatrix << std::endl; + + auto view = triangularMatrix.getView(); + TNL::Containers::Vector< int, Device > rowLengths; + view.getCompressedRowLengths( rowLengths ); + + std::cout << "Compressed row lengths are: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting compressed row lengths on host: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting compressed row lengths on CUDA device: " << std::endl; + getCompressedRowLengthsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..6fa51b25b0b105fac248451866d35f16b22683da --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c10b6a8040d50d25eff859dfb11b0db5a7ae70d --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, { + { 0, 0, 1 }, + { 1, 0, 1 }, { 1, 1, 2 }, + { 2, 0, 1 }, { 2, 1, 2 }, { 2, 2, 3 }, + { 3, 0, 1 }, { 3, 1, 2 }, { 3, 2, 3 }, { 3, 3, 4 }, + { 4, 0, 1 }, { 4, 1, 2 }, { 4, 2, 3 }, { 4, 3, 4 }, { 4, 4, 5 } } ); + auto matrixView = matrix.getView(); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = matrixView.getRow( rowIdx ); + return row.getValue( rowIdx ); + }; + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix.getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix trace is " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..a1501e37864344438b6e2ceb064ce84f8943d49f --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e14134f3232a8acbb3840ce8b916c9bcbcf08bff --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( + 5, // number of matrix rows + 5, // number of matrix columns + { // matrix elements definition + { 0, 0, 2.0 }, + { 1, 0, -1.0 }, { 1, 1, 2.0 }, { 1, 2, -1.0 }, + { 2, 1, -1.0 }, { 2, 2, 2.0 }, { 2, 3, -1.0 }, + { 3, 2, -1.0 }, { 3, 3, 2.0 }, { 3, 4, -1.0 }, + { 4, 4, 2.0 } } ); + auto view = matrix.getView(); + + for( int i = 0; i < 5; i++ ) + { + for( int j = 0; j < 5; j++ ) + std::cout << std::setw( 5 ) << view.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..03d5d32f23fb9439a210fc6aa718212a1fa513a1 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..569fabb6af88457c9820aa8fb483b8f2eeb3fb70 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 1, 1, 1, 1 }, 5 ); + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + auto row = view.getRow( rowIdx ); + row.setElement( 0, rowIdx, 10 * ( rowIdx + 1 ) ); + }; + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..56b0f7e6275b55114ff304dafa40e2fbe3c80713 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3147bd2919eaea3188366c943b0239d385ac6e63 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp @@ -0,0 +1,24 @@ +#include +#include +#include + + +template< typename Device > +void getSerializationTypeExample() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix; + auto view = matrix.getView(); + + std::cout << "Matrix type is: " << view.getSerializationType(); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get serialization type on CPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get serialization type on CUDA GPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu new file mode 120000 index 0000000000000000000000000000000000000000..9ddc5c6f29d86ca559c745504a2acbeb9fa5e005 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_getSerializationType.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..69e2ff6fd2cd77d06235e143b44d854da4de414b --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix ( 5, 5, { + { 0, 0, 1 }, + { 1, 1, 1 }, { 1, 2, 8 }, + { 2, 2, 1 }, { 2, 3, 9 }, + { 3, 3, 1 }, { 3, 4, 9 }, + { 4, 4, 1 } } ); + auto matrixView = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrixView.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..f244c8372ea90bd142f8fae912ae48d32d3be0fa --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3de6634a3bdd492135e83badc0d1febf02e9b5d3 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + TNL::Matrices::SparseMatrix< double, Device > matrix( { 1, 1, 1, 1, 1 }, 5 ); + auto view = matrix.getView(); + for( int i = 0; i < 5; i++ ) + view.setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + view.setElement( i, i, -i ); + }; + + TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..3dba0b9ec39a9e39526e94415eefb06411c48d02 --- /dev/null +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cu @@ -0,0 +1 @@ +SparseMatrixViewExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f66e71a4ab43a13ceec55d7d343ff1045acd48d --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/CMakeLists.txt @@ -0,0 +1,269 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1_cuda TridiagonalMatrixExample_Constructor_init_list_1.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out + OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType_cuda TridiagonalMatrixExample_getSerializationType.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out + OUTPUT TridiagonalMatrixExample_getSerializationType.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElements_cuda TridiagonalMatrixExample_setElements.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out + OUTPUT TridiagonalMatrixExample_setElements.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths_cuda TridiagonalMatrixExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out + OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow_cuda TridiagonalMatrixExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out + OUTPUT TridiagonalMatrixExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getRow_cuda TridiagonalMatrixExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out + OUTPUT TridiagonalMatrixExample_getRow.out ) + +# This example does not work with nvcc 10.1. Restore it here when it works. +# CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_setElement_cuda TridiagonalMatrixExample_setElement.cu ) +# ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement_cuda > +# ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out +# OUTPUT TridiagonalMatrixExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_addElement_cuda TridiagonalMatrixExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out + OUTPUT TridiagonalMatrixExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_getElement_cuda TridiagonalMatrixExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out + OUTPUT TridiagonalMatrixExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction_cuda TridiagonalMatrixExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out + OUTPUT TridiagonalMatrixExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction_cuda TridiagonalMatrixExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out + OUTPUT TridiagonalMatrixExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forRows_cuda TridiagonalMatrixExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forRows.out + OUTPUT TridiagonalMatrixExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixExample_forAllRows_cuda TridiagonalMatrixExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forAllRows.out + OUTPUT TridiagonalMatrixExample_forAllRows.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths_cuda TridiagonalMatrixViewExample_getCompressedRowLengths.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out + OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow_cuda TridiagonalMatrixViewExample_getConstRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out + OUTPUT TridiagonalMatrixViewExample_getConstRow.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow_cuda TridiagonalMatrixViewExample_getRow.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out + OUTPUT TridiagonalMatrixViewExample_getRow.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement_cuda TridiagonalMatrixViewExample_setElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out + OUTPUT TridiagonalMatrixViewExample_setElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement_cuda TridiagonalMatrixViewExample_addElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out + OUTPUT TridiagonalMatrixViewExample_addElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement_cuda TridiagonalMatrixViewExample_getElement.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out + OUTPUT TridiagonalMatrixViewExample_getElement.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction_cuda TridiagonalMatrixViewExample_rowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out + OUTPUT TridiagonalMatrixViewExample_rowsReduction.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction_cuda TridiagonalMatrixViewExample_allRowsReduction.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out + OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forRows_cuda TridiagonalMatrixViewExample_forRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forRows.out + OUTPUT TridiagonalMatrixViewExample_forRows.out ) + + CUDA_ADD_EXECUTABLE( TridiagonalMatrixViewExample_forAllRows_cuda TridiagonalMatrixViewExample_forAllRows.cu ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forAllRows_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forAllRows.out + OUTPUT TridiagonalMatrixViewExample_forAllRows.out ) + +ELSE() + + ADD_EXECUTABLE( TridiagonalMatrixExample_Constructor_init_list_1 TridiagonalMatrixExample_Constructor_init_list_1.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_Constructor_init_list_1 > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_Constructor_init_list_1.out + OUTPUT TridiagonalMatrixExample_Constructor_init_list_1.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_getSerializationType TridiagonalMatrixExample_getSerializationType.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getSerializationType > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getSerializationType.out + OUTPUT TridiagonalMatrixExample_getSerializationType.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_setElements TridiagonalMatrixExample_setElements.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElements > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElements.out + OUTPUT TridiagonalMatrixExample_setElements.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_getCompressedRowLengths TridiagonalMatrixExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getCompressedRowLengths.out + OUTPUT TridiagonalMatrixExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_getConstRow TridiagonalMatrixExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getConstRow.out + OUTPUT TridiagonalMatrixExample_getConstRow.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_getRow TridiagonalMatrixExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getRow.out + OUTPUT TridiagonalMatrixExample_getRow.out ) + +# This example does not work with nvcc 10.1. Restore it here when it works. +# ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp ) +# ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement > +# ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out +# OUTPUT TridiagonalMatrixExample_setElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_addElement TridiagonalMatrixExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_addElement.out + OUTPUT TridiagonalMatrixExample_addElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_getElement TridiagonalMatrixExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_getElement.out + OUTPUT TridiagonalMatrixExample_getElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_rowsReduction TridiagonalMatrixExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_rowsReduction.out + OUTPUT TridiagonalMatrixExample_rowsReduction.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_allRowsReduction TridiagonalMatrixExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_allRowsReduction.out + OUTPUT TridiagonalMatrixExample_allRowsReduction.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_forRows TridiagonalMatrixExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forRows.out + OUTPUT TridiagonalMatrixExample_forRows.out ) + + ADD_EXECUTABLE( TridiagonalMatrixExample_forAllRows TridiagonalMatrixExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_forAllRows.out + OUTPUT TridiagonalMatrixExample_forAllRows.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_getCompressedRowLengths TridiagonalMatrixViewExample_getCompressedRowLengths.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getCompressedRowLengths > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getCompressedRowLengths.out + OUTPUT TridiagonalMatrixViewExample_getCompressedRowLengths.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_getConstRow TridiagonalMatrixViewExample_getConstRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getConstRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getConstRow.out + OUTPUT TridiagonalMatrixViewExample_getConstRow.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_getRow TridiagonalMatrixViewExample_getRow.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getRow > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getRow.out + OUTPUT TridiagonalMatrixViewExample_getRow.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_setElement TridiagonalMatrixViewExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_setElement.out + OUTPUT TridiagonalMatrixViewExample_setElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_addElement TridiagonalMatrixViewExample_addElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_addElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_addElement.out + OUTPUT TridiagonalMatrixViewExample_addElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_getElement TridiagonalMatrixViewExample_getElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_getElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_getElement.out + OUTPUT TridiagonalMatrixViewExample_getElement.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_rowsReduction TridiagonalMatrixViewExample_rowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_rowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_rowsReduction.out + OUTPUT TridiagonalMatrixViewExample_rowsReduction.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_allRowsReduction TridiagonalMatrixViewExample_allRowsReduction.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_allRowsReduction > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_allRowsReduction.out + OUTPUT TridiagonalMatrixViewExample_allRowsReduction.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_forRows TridiagonalMatrixViewExample_forRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forRows.out + OUTPUT TridiagonalMatrixViewExample_forRows.out ) + + ADD_EXECUTABLE( TridiagonalMatrixViewExample_forAllRows TridiagonalMatrixViewExample_forAllRows.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixViewExample_forAllRows > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixViewExample_forAllRows.out + OUTPUT TridiagonalMatrixViewExample_forAllRows.out ) + +ENDIF() + + ADD_EXECUTABLE( TridiagonalMatrixExample_setElement TridiagonalMatrixExample_setElement.cpp ) + ADD_CUSTOM_COMMAND( COMMAND TridiagonalMatrixExample_setElement > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TridiagonalMatrixExample_setElement.out + OUTPUT TridiagonalMatrixExample_setElement.out ) + + +ADD_CUSTOM_TARGET( RunTridiagonalMatricesExamples ALL DEPENDS + TridiagonalMatrixExample_Constructor_init_list_1.out + TridiagonalMatrixExample_getSerializationType.out + TridiagonalMatrixExample_setElements.out + TridiagonalMatrixExample_getCompressedRowLengths.out + TridiagonalMatrixExample_getConstRow.out + TridiagonalMatrixExample_getRow.out + TridiagonalMatrixExample_setElement.out + TridiagonalMatrixExample_addElement.out + TridiagonalMatrixExample_getElement.out + TridiagonalMatrixExample_rowsReduction.out + TridiagonalMatrixExample_allRowsReduction.out + TridiagonalMatrixExample_forRows.out + TridiagonalMatrixExample_forAllRows.out + TridiagonalMatrixViewExample_getCompressedRowLengths.out + TridiagonalMatrixViewExample_getConstRow.out + TridiagonalMatrixViewExample_getRow.out + TridiagonalMatrixViewExample_setElement.out + TridiagonalMatrixViewExample_addElement.out + TridiagonalMatrixViewExample_getElement.out + TridiagonalMatrixViewExample_rowsReduction.out + TridiagonalMatrixViewExample_allRowsReduction.out + TridiagonalMatrixViewExample_forRows.out + TridiagonalMatrixViewExample_forAllRows.out +) + diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4d8c893473cc2957cf710db5ffb451a0cfb94a9 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void createTridiagonalMatrix() +{ + const int matrixSize = 6; + + /*** + * Setup the following matrix (dots represent zeros): + * + * / 2 -1 . . . . \ + * | -1 2 -1 . . . | + * | . -1 2 -1 . . | + * | . . -1 2 -1 . | + * | . . . -1 2 -1 | + * \ . . . . -1 2 / + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + matrixSize, { + /*** + * To set the matrix elements we first extend the diagonals to their full + * lengths even outside the matrix (dots represent zeros and zeros are + * artificial zeros used for memory alignment): + * + * 0 / 2 -1 . . . . \ -> { 0, 2, -1 } + * | -1 2 -1 . . . | -> { -1, 2, -1 } + * | . -1 2 -1 . . | -> { -1, 2, -1 } + * | . . -1 2 -1 . | -> { -1, 2, -1 } + * | . . . -1 2 -1 | -> { -1, 2, -1 } + * \ . . . . -1 2 / 0 -> { -1, 2, 0 } + * + */ + { 0, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, 0 } + } ); + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating tridiagonal matrix on CPU ... " << std::endl; + createTridiagonalMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating tridiagonal matrix on CUDA GPU ... " << std::endl; + createTridiagonalMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu new file mode 120000 index 0000000000000000000000000000000000000000..5c6620a5d4f6c05bfee3ef8aaee8df692467940b --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_Constructor_init_list_1.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..66876f1e343430df0b0863119969e1702baba1b6 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp @@ -0,0 +1,39 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize // number of columns + ); + for( int i = 0; i < matrixSize; i++ ) + matrix.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < matrixSize; i++ ) + { + if( i > 0 ) + matrix.addElement( i, i - 1, 1.0, 5.0 ); + matrix.addElement( i, i, 1.0, 5.0 ); + if( i < matrixSize - 1 ) + matrix.addElement( i, i + 1, 1.0, 5.0 ); + } + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..be78eddc3fca4e9dadf0848f56ef93ca832362f6 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..043dcc82efc2203f8f9b52a4a90a70d7a1e25ae9 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { { 0, 1, 3 }, // matrix elements + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..795c4febffbbb109b99553e8437c33db942bcab7 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ff3fdee91c080afd212718ddbf7159ab6f479164 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5 ); // number of matrix columns + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + value = 3 - localIdx; + }; + matrix.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..43736be3f83e86f2d7842191f76be12fb931e4a0 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ba17df51133e5f455f9e5d81af1d6e40a7e78fa --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5 ); // number of matrix columns + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + value = 3 - localIdx; + }; + matrix.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..a187b1e67da9619090be45c2ec69f6709bac9b88 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ebe40c5acaa56dcab44cb97465b127c5605ac592 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + const int gridSize( 6 ); + const int matrixSize = gridSize; + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize // number of columns + ); + matrix.setElements( { + { 0.0, 1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { 0.0, 1.0 } + } ); + TNL::Containers::Vector< int, Device > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; + std::cout << "Compressed row lengths: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..84d47b4466d3189b79ac8707818bed7a700b3c6b --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bac9303ae33ad662851b2038f31821fbb5404c56 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize = 5; + using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix ( + matrixSize, // number of matrix rows + matrixSize // number of matrix columns + ); + matrix->setElements( + { { 0.0, 2.0, 1.0 }, + { 0.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 0.0, 2.0, 1.0 } } ); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = matrix->getRow( rowIdx ); + return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use TridiagonalMatrixView. See + * TridiagonalMatrixView::getConstRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( matrix->getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix reads as: " << std::endl << *matrix << std::endl; + std::cout << "Matrix trace is: " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..5bfb460721f6b205e52cd07b0f0a2fc0e550cf8d --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5ba2bf302701d5bdb6b69dcab69130481f8410c1 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + matrixSize, // number of matrix columns + { // matrix elements definition + { 0.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, 0.0 } + } ); + + + for( int i = 0; i < matrixSize; i++ ) + { + for( int j = 0; j < matrixSize; j++ ) + std::cout << std::setw( 5 ) << matrix.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..a5089da5548b73dc67e4da9f1338eb963ef10d53 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfbdce919b381572b8d1ce4b3614298811518713 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize( 5 ); + using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< MatrixType > matrix( + matrixSize, // number of matrix rows + matrixSize // number of matrix columns + ); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + //auto row = matrix->getRow( rowIdx ); + // For some reason the previous line of code is not accepted by nvcc 10.1 + // so we replace it with the following two lines. + auto ref = matrix.modifyData(); + auto row = ref.getRow( rowIdx ); + + if( rowIdx > 0 ) + row.setElement( 0, -1.0 ); // elements below the diagonal + row.setElement( 1, 2.0 ); // elements on the diagonal + if( rowIdx < matrixSize - 1 ) // elements above the diagonal + row.setElement( 2, -1.0 ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use TridiagonalMatrixView. See + * TridiagonalMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix->getRows(), f ); + std::cout << std::endl << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + // It seems that nvcc 10.1 does not handle lambda functions properly. + // It is hard to make nvcc to compile this example and it does not work + // properly. We will try it with later version of CUDA. + //std::cout << "Getting matrix rows on CUDA device: " << std::endl; + //getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..3e31db140745bb4537a7bf618f46436d58e6a94e --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e76f87e1a2c048847d9fbd0d6d696eea8b6f401a --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp @@ -0,0 +1,23 @@ +#include +#include +#include + + +template< typename Device > +void getSerializationTypeExample() +{ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix; + + std::cout << "Matrix type is: " << matrix.getSerializationType(); +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get serialization type on CPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get serialization type on CUDA GPU ... " << std::endl; + getSerializationTypeExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu new file mode 120000 index 0000000000000000000000000000000000000000..4feca977e13836256aa0a9af6243b3c42e4e72f3 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_getSerializationType.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..792dc98d386fa3797b2ee06c13262c1838359269 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { { 0, 1, 3 }, // matrix elements + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 } } ); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + matrix.rowsReduction( 0, matrix.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..69a58007cbcbfcf49bf5678a9b208274cb6de91b --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03121d41bb72fea74e33d0552557d4175517c424 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + const int matrixSize( 5 ); + using Matrix = TNL::Matrices::TridiagonalMatrix< double, Device >; + TNL::Pointers::SharedPointer< Matrix > matrix( matrixSize, matrixSize ); + for( int i = 0; i < 5; i++ ) + matrix->setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << *matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + if( i > 0 ) + matrix->setElement( i, i - 1, 1.0 ); + matrix->setElement( i, i, -i ); + if( i < matrixSize - 1 ) + matrix->setElement( i, i + 1, 1.0 ); + }; + + /*** + * For the case when Device is CUDA device we need to synchronize smart + * pointers. To avoid this you may use TridiagonalMatrixView. See + * TridiagonalMatrixView::getRow example for details. + */ + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << *matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..c693c5c23789abd25e3bc6857234c91f3c28f815 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_setElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dcc6f3d2a06c45cf99fc4aec13b8239c5f6b810d --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void createTridiagonalMatrix() +{ + const int matrixSize = 6; + + /*** + * Setup the following matrix (dots represent zeros): + * + * / 2 -1 . . . . \ + * | -1 2 -1 . . . | + * | . -1 2 -1 . . | + * | . . -1 2 -1 . | + * | . . . -1 2 -1 | + * \ . . . . -1 2 / + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( matrixSize, matrixSize ); + matrix.setElements( { + /*** + * To set the matrix elements we first extend the diagonals to their full + * lengths even outside the matrix (dots represent zeros and zeros are + * artificial zeros used for memory alignment): + * + * 0 / 2 -1 . . . . \ -> { 0, 2, -1 } + * | -1 2 -1 . . . | -> { -1, 2, -1 } + * | . -1 2 -1 . . | -> { -1, 2, -1 } + * | . . -1 2 -1 . | -> { -1, 2, -1 } + * | . . . -1 2 -1 | -> { -1, 2, -1 } + * \ . . . . -1 2 / 0 -> { -1, 2, 0 } + * + */ + { 0, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, -1 }, + { -1, 2, 0 } + } ); + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating tridiagonal matrix on CPU ... " << std::endl; + createTridiagonalMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating tridiagonal matrix on CUDA GPU ... " << std::endl; + createTridiagonalMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu new file mode 120000 index 0000000000000000000000000000000000000000..4a08aa9268694d0210eb961b5944c1b7a09ea607 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cu @@ -0,0 +1 @@ +TridiagonalMatrixExample_setElements.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..183710215561b949f9aa625da3c179f0308548bc --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp @@ -0,0 +1,41 @@ +#include +#include +#include + +template< typename Device > +void addElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize // number of columns + ); + auto view = matrix.getView(); + + for( int i = 0; i < matrixSize; i++ ) + view.setElement( i, i, i ); + + std::cout << "Initial matrix is: " << std::endl << matrix << std::endl; + + for( int i = 0; i < matrixSize; i++ ) + { + if( i > 0 ) + view.addElement( i, i - 1, 1.0, 5.0 ); + view.addElement( i, i, 1.0, 5.0 ); + if( i < matrixSize - 1 ) + view.addElement( i, i + 1, 1.0, 5.0 ); + } + + std::cout << "Matrix after addition is: " << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Add elements on host:" << std::endl; + addElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Add elements on CUDA device:" << std::endl; + addElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..9eb313e8130c1e8117e4abf19c1e710d5673b88f --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_addElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bacb98beecc1ac87be73302896e60ef8e498bc4d --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { { 0, 1, 3 }, // matrix elements + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 } } ); + auto view = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + view.allRowsReduction( fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..7b330650fc19e8db1bd2c844ef405e2b79f3a133 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_allRowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bd889e1aff317821702c00ff301f2fa7e81c1c19 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5 ); // number of matrix columns + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + value = 3 - localIdx; + }; + view.forAllRows( f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..fae2028882fb518b3b8d879c8aa29bf49c7fe652 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_forAllRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24fe78f7f7f34a472e83ac3c060d9ba44171998b --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include + +template< typename Device > +void forRowsExample() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + 5, // number of matrix rows + 5 ); // number of matrix columns + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx, int localIdx, int columnIdx, double& value, bool& compute ) { + /*** + * 'forRows' method iterates only over matrix elements lying on given subdiagonals + * and so we do not need to check anything. The element value can be expressed + * by the 'localIdx' variable, see the following figure: + * + * 0 1 2 <- localIdx values + * ------- + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + value = 3 - localIdx; + }; + view.forRows( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix on host: " << std::endl; + forRowsExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix on CUDA device: " << std::endl; + forRowsExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu new file mode 120000 index 0000000000000000000000000000000000000000..ea70e5b9e29793bbfda1ea1eb88b61bfa141eb41 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_forRows.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f20811ea632c54d9e258b3b75e91e1c8547a2669 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include + + +template< typename Device > +void laplaceOperatorMatrix() +{ + const int gridSize( 6 ); + const int matrixSize = gridSize; + TNL::Matrices::TridiagonalMatrix< double, Device > matrix( + matrixSize, // number of rows + matrixSize // number of columns + ); + matrix.setElements( { + { 0.0, 1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { 0.0, 1.0 } + } ); + auto view = matrix.getView(); + + TNL::Containers::Vector< int, Device > rowLengths; + view.getCompressedRowLengths( rowLengths ); + std::cout << "Laplace operator matrix: " << std::endl << matrix << std::endl; + std::cout << "Compressed row lengths: " << rowLengths << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating Laplace operator matrix on CPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating Laplace operator matrix on CUDA GPU ... " << std::endl; + laplaceOperatorMatrix< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu new file mode 120000 index 0000000000000000000000000000000000000000..c44da63b326aff9e126b11d05fdb001ed5ae0ae6 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_getCompressedRowLengths.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8e5f20793f81fd3783a1f2bba34141b3759bf121 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize = 5; + using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >; + MatrixType matrix ( + matrixSize, // number of matrix columns + { { 0.0, 2.0, 1.0 }, // matrix elements + { 0.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 3.0, 2.0, 1.0 }, + { 0.0, 2.0, 1.0 } } ); + auto view = matrix.getView(); + + /*** + * Fetch lambda function returns diagonal element in each row. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx ) mutable -> double { + auto row = view.getRow( rowIdx ); + return row.getValue( 2 ); // get value from subdiagonal with index 2, i.e. the main diagonal + }; + + /*** + * Compute the matrix trace. + */ + int trace = TNL::Algorithms::Reduction< Device >::reduce( view.getRows(), std::plus<>{}, fetch, 0 ); + std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Matrix trace is: " << trace << "." << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..c43061eb1c87c6e5f3edd83505f903972210c6fd --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_getConstRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5be0bd761023f7ce596fa36810a775e5312d73ea --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +template< typename Device > +void getElements() +{ + const int matrixSize( 5 ); + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + matrixSize, // number of matrix columns + { // matrix elements definition + { 0.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, -1.0 }, + { -1.0, 2.0, 0.0 } + } ); + auto view = matrix.getView(); + + for( int i = 0; i < matrixSize; i++ ) + { + for( int j = 0; j < matrixSize; j++ ) + std::cout << std::setw( 5 ) << view.getElement( i, j ); + std::cout << std::endl; + } +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Get elements on host:" << std::endl; + getElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Get elements on CUDA device:" << std::endl; + getElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..70bf7122493bab7f4055a4a7128da10857440193 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_getElement.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp new file mode 100644 index 0000000000000000000000000000000000000000..641149e050ffe738576b63a73f1cc792457ebf56 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void getRowExample() +{ + const int matrixSize( 5 ); + using MatrixType = TNL::Matrices::TridiagonalMatrix< double, Device >; + MatrixType matrix( + matrixSize, // number of matrix rows + matrixSize // number of matrix columns + ); + auto view = matrix.getView(); + + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + auto row = view.getRow( rowIdx ); + + if( rowIdx > 0 ) + row.setElement( 0, -1.0 ); // elements below the diagonal + row.setElement( 1, 2.0 ); // elements on the diagonal + if( rowIdx < matrixSize - 1 ) // elements above the diagonal + row.setElement( 2, -1.0 ); + }; + + /*** + * Set the matrix elements. + */ + TNL::Algorithms::ParallelFor< Device >::exec( 0, view.getRows(), f ); + std::cout << std::endl << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Getting matrix rows on host: " << std::endl; + getRowExample< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Getting matrix rows on CUDA device: " << std::endl; + getRowExample< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu new file mode 120000 index 0000000000000000000000000000000000000000..12e2f392ccb0e630aab36a507c4ded137b221cf0 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_getRow.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa345292ce4aa5fc629225d910ca90a77bf6be07 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void rowsReduction() +{ + /*** + * Set the following matrix (dots represent zero matrix elements and zeros are + * padding zeros for memory alignment): + * + * 0 / 1 3 . . . \ -> { 0, 1, 3 } + * | 2 1 3 . . | -> { 2, 1, 3 } + * | . 2 1 3 . | -> { 2, 1, 3 } + * | . . 2 1 3 | -> { 2, 1, 3 } + * \ . . . 2 1 / 0 -> { 2, 1, 0 } + * + */ + TNL::Matrices::TridiagonalMatrix< double, Device > matrix ( + 5, // number of matrix columns + { { 0, 1, 3 }, // matrix elements + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 }, + { 2, 1, 3 } } ); + auto view = matrix.getView(); + + /*** + * Find largest element in each row. + */ + TNL::Containers::Vector< double, Device > rowMax( matrix.getRows() ); + + /*** + * Prepare vector view for lambdas. + */ + auto rowMaxView = rowMax.getView(); + + /*** + * Fetch lambda just returns absolute value of matrix elements. + */ + auto fetch = [=] __cuda_callable__ ( int rowIdx, int columnIdx, const double& value ) -> double { + return TNL::abs( value ); + }; + + /*** + * Reduce lambda return maximum of given values. + */ + auto reduce = [=] __cuda_callable__ ( double& a, const double& b ) -> double { + return TNL::max( a, b ); + }; + + /*** + * Keep lambda store the largest value in each row to the vector rowMax. + */ + auto keep = [=] __cuda_callable__ ( int rowIdx, const double& value ) mutable { + rowMaxView[ rowIdx ] = value; + }; + + /*** + * Compute the largest values in each row. + */ + view.rowsReduction( 0, view.getRows(), fetch, reduce, keep, std::numeric_limits< double >::lowest() ); + + std::cout << "The matrix reads as: " << std::endl << matrix << std::endl; + std::cout << "Max. elements in rows are: " << rowMax << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Rows reduction on host:" << std::endl; + rowsReduction< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Rows reduction on CUDA device:" << std::endl; + rowsReduction< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu new file mode 120000 index 0000000000000000000000000000000000000000..f749c1ef47ec34830d51be3e3dd39d32347b3c8f --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_rowsReduction.cpp \ No newline at end of file diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a424904142b2fcc26f499bd3286ba03c6a58dba2 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void setElements() +{ + const int matrixSize( 5 ); + using Matrix = TNL::Matrices::TridiagonalMatrix< double, Device >; + Matrix matrix( matrixSize, matrixSize ); + auto view = matrix.getView(); + for( int i = 0; i < 5; i++ ) + view.setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + if( i > 0 ) + view.setElement( i, i - 1, 1.0 ); + view.setElement( i, i, -i ); + if( i < matrixSize - 1 ) + view.setElement( i, i + 1, 1.0 ); + }; + + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrixSize, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << matrix << std::endl; +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Set elements on host:" << std::endl; + setElements< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Set elements on CUDA device:" << std::endl; + setElements< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu new file mode 120000 index 0000000000000000000000000000000000000000..9510661c16a7340e6fd1f51958f2c35f280cd177 --- /dev/null +++ b/Documentation/Examples/Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cu @@ -0,0 +1 @@ +TridiagonalMatrixViewExample_setElement.cpp \ No newline at end of file diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h index a066b461ef018232023873a4787948a4fb3aba60..91db24d0187ea588d111d49beb5370c3e27fe24b 100644 --- a/src/Benchmarks/SpMV/spmv-legacy.h +++ b/src/Benchmarks/SpMV/spmv-legacy.h @@ -158,12 +158,12 @@ benchmarkSpMV( Benchmark& benchmark, benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, - { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, + { "non-zeros", convertToString( hostMatrix.getNonzeroElementsCount() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", MatrixInfo< HostMatrix >::getFormat() } } )); - const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); + const int elements = hostMatrix.getNonzeroElementsCount(); const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; benchmark.setOperation( datasetSize ); diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 9bcd45d08605c991533bede42836185264e10749..034c4560b65a6c47b5282323a20643c57770cdd6 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -88,9 +88,6 @@ class CSR __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 685f6ef54a6d7ad90ec69e5d45d83d78c0e1f337..0ba034d5b24dbee0c9f7cf798ae9e51a8b5d4ffb 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -170,16 +170,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return offsets[ segmentIdx ] + localIdx; } -template< typename Device, - typename Index, - typename IndexAllocator > -__cuda_callable__ -void -CSR< Device, Index, IndexAllocator >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index, typename IndexAllocator > diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index b01e6c66d7e9ff6c6022e3b88ece898940616fa9..f915a2e38029811c19fbac68c06fa83312101b7e 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -87,9 +87,6 @@ class CSRView __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 7599327d1085f40a3cbedb24b297dc8300f202fa..e6f840d6b20d04526f274ff4418299a8689389be 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -151,15 +151,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return offsets[ segmentIdx ] + localIdx; } -template< typename Device, - typename Index > -__cuda_callable__ -void -CSRView< Device, Index >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index > __cuda_callable__ diff --git a/src/TNL/Containers/Segments/ElementsOrganization.h b/src/TNL/Containers/Segments/ElementsOrganization.h index 3c4086cd20ce7463f69f1aba12dff1280e07824f..d930f57f9b8dc18748b861acd743ec0c3672e560 100644 --- a/src/TNL/Containers/Segments/ElementsOrganization.h +++ b/src/TNL/Containers/Segments/ElementsOrganization.h @@ -26,7 +26,15 @@ struct DefaultElementsOrganization return ColumnMajorOrder; }; }; - } // namespace Segments } // namespace Containers + +String getSerializationType( Containers::Segments::ElementsOrganization Organization ) +{ + if( Organization == Containers::Segments::RowMajorOrder ) + return String( "RowMajorOrder" ); + else + return String( "ColumnMajorOrder" ); +} + } // namespace TNL diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index f5c8490dec78480ca0a62d87a033702b6dbd4e25..8ef79f6d07981ca6a30c2778b06f3e2f0e77ba4e 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -85,9 +85,6 @@ class Ellpack __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 922a75765ae3920072712016f498847d57105eae..91e06e1108b52935320ddba634c9aa344df3ba24 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -233,17 +233,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return segmentIdx + this->alignedSize * localIdx; } -template< typename Device, - typename Index, - typename IndexAllocator, - ElementsOrganization Organization, - int Alignment > -__cuda_callable__ -void Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 3559949d890da4dfcd43d5c3fc3852e55bec7175..1d77c7445b3b5f3792daebafca980bfa837929c7 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -81,9 +81,6 @@ class EllpackView __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index 0195424e99d2f9467565d58841ab90979edf041e..2d1d8090f035b92ec5ba7b226cfe26faddf11524 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -164,15 +164,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return segmentIdx + this->alignedSize * localIdx; } -template< typename Device, - typename Index, - ElementsOrganization Organization, - int Alignment > -__cuda_callable__ void EllpackView< Device, Index, Organization, Alignment >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index, ElementsOrganization Organization, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index c85e457424443604cd4954e6ff9a4513bd9f17ca..a0e5c81b0d7e92f109ccedacf27ef7a4454d5927 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -82,9 +82,6 @@ class SlicedEllpack __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 6a9bc4fe2ba888777c7ddf277e0504e2ce7dc986..a288bf7df800011ba3baeeb552e3e3e518b4494d 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -259,18 +259,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; } -template< typename Device, - typename Index, - typename IndexAllocator, - ElementsOrganization Organization, - int SliceSize > -__cuda_callable__ -void -SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 2a0fcb189a5b442831857378fb33c8c9f5f819be..5c6e0eeb26cf7e0c8586be1d7f61c2e39ce37af9 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -83,9 +83,6 @@ class SlicedEllpackView __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - __cuda_callable__ SegmentViewType getSegmentView( const IndexType segmentIdx ) const; diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 7331181efdeaa346d02230dd807d87471619288f..34f24def68187abf603d38da8eadae43b8b3e26c 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -202,17 +202,6 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; } -template< typename Device, - typename Index, - ElementsOrganization Organization, - int SliceSize > -__cuda_callable__ -void -SlicedEllpackView< Device, Index, Organization, SliceSize >:: -getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const -{ -} - template< typename Device, typename Index, ElementsOrganization Organization, diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index 392bdc6179eac3a28ad9bb9ffbd27a4ca287d418..736cd3bcf77820d36c70fa9f9c02f00cfc753886 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -35,7 +35,7 @@ template< typename Real = double, typename Index = int, ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > -class DenseMatrix : public Matrix< Real, Device, Index > +class DenseMatrix : public Matrix< Real, Device, Index, RealAllocator > { protected: using BaseType = Matrix< Real, Device, Index, RealAllocator >; @@ -88,7 +88,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; /** - * \brief Type for accessing matrix row. + * \brief Type for accessing matrix rows. */ using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; @@ -103,17 +103,35 @@ class DenseMatrix : public Matrix< Real, Device, Index > using Self = DenseMatrix< _Real, _Device, _Index, _Organization, _RealAllocator >; /** - * \brief Constructor without parameters. + * \brief Constructor only with values allocator. + * + * \param allocator is used for allocation of matrix elements values. + */ + DenseMatrix( const RealAllocatorType& allocator = RealAllocatorType() ); + + /** + * \brief Copy constructor. + * + * \param matrix is the source matrix + */ + DenseMatrix( const DenseMatrix& matrix ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is the source matrix */ - DenseMatrix(); + DenseMatrix( DenseMatrix&& matrix ) = default; /** * \brief Constructor with matrix dimensions. * * \param rows is number of matrix rows. * \param columns is number of matrix columns. + * \param allocator is used for allocation of matrix elements values. */ - DenseMatrix( const IndexType rows, const IndexType columns ); + DenseMatrix( const IndexType rows, const IndexType columns, + const RealAllocatorType& allocator = RealAllocatorType() ); /** * \brief Constructor with 2D initializer list. @@ -124,14 +142,16 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \param data is a initializer list of initializer lists representing * list of matrix rows. + * \param allocator is used for allocation of matrix elements values. * * \par Example - * \include Matrices/DenseMatrixExample_Constructor_init_list.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_Constructor_init_list.cpp * \par Output * \include DenseMatrixExample_Constructor_init_list.out */ template< typename Value > - DenseMatrix( std::initializer_list< std::initializer_list< Value > > data ); + DenseMatrix( std::initializer_list< std::initializer_list< Value > > data, + const RealAllocatorType& allocator = RealAllocatorType() ); /** * \brief Returns a modifiable view of the dense matrix. @@ -189,6 +209,15 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Matrix > void setLike( const Matrix& matrix ); + /** + * \brief This method is only for the compatibility with the sparse matrices. + * + * This method does nothing. In debug mode it contains assertions checking + * that given rowCapacities are compatible with the current matrix dimensions. + */ + template< typename RowCapacitiesVector > + void setRowCapacities( const RowCapacitiesVector& rowCapacities ); + /** * \brief This method recreates the dense matrix from 2D initializer list. * @@ -200,22 +229,13 @@ class DenseMatrix : public Matrix< Real, Device, Index > * list of matrix rows. * * \par Example - * \include Matrices/DenseMatrixExample_setElements.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_setElements.cpp * \par Output * \include DenseMatrixExample_setElements.out */ template< typename Value > void setElements( std::initializer_list< std::initializer_list< Value > > data ); - /** - * \brief This method is only for the compatibility with the sparse matrices. - * - * This method does nothing. In debug mode it contains assertions checking - * that given rowCapacities are compatible with the current matrix dimensions. - */ - template< typename RowCapacitiesVector > - void setRowCapacities( const RowCapacitiesVector& rowCapacities ); - /** * \brief Computes number of non-zeros in each row. * @@ -223,36 +243,20 @@ class DenseMatrix : public Matrix< Real, Device, Index > * will be stored. * * \par Example - * \include Matrices/DenseMatrixExample_getCompressedRowLengths.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_getCompressedRowLengths.cpp * \par Output * \include DenseMatrixExample_getCompressedRowLengths.out */ template< typename RowLengthsVector > void getCompressedRowLengths( RowLengthsVector& rowLengths ) const; - /** - * \brief Returns number of all matrix elements. - * - * This method is here mainly for compatibility with sparse matrices since - * the number of all matrix elements is just number of rows times number of - * columns. - * - * \return number of all matrix elements. - * - * \par Example - * \include Matrices/DenseMatrixExample_getElementsCount.cpp - * \par Output - * \include DenseMatrixExample_getElementsCount.out - */ - IndexType getElementsCount() const; - /** * \brief Returns number of non-zero matrix elements. * * \return number of all non-zero matrix elements. * * \par Example - * \include Matrices/DenseMatrixExample_getElementsCount.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_getElementsCount.cpp * \par Output * \include DenseMatrixExample_getElementsCount.out */ @@ -271,7 +275,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \return RowView for accessing given matrix row. * * \par Example - * \include Matrices/DenseMatrixExample_getConstRow.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_getConstRow.cpp * \par Output * \include DenseMatrixExample_getConstRow.out * @@ -288,7 +292,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \return RowView for accessing given matrix row. * * \par Example - * \include Matrices/DenseMatrixExample_getRow.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_getRow.cpp * \par Output * \include DenseMatrixExample_getRow.out * @@ -349,7 +353,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param value is the value the element will be set to. * * \par Example - * \include Matrices/DenseMatrixExample_setElement.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_setElement.cpp * \par Output * \include DenseMatrixExample_setElement.out */ @@ -373,6 +377,12 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param value is the value the element will be set to. * \param thisElementMultiplicator is multiplicator the original matrix element * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixExample_addElement.cpp + * \par Output + * \include DenseMatrixExample_addElement.out + * */ __cuda_callable__ void addElement( const IndexType row, @@ -394,6 +404,12 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param column i a column index of the matrix element. * * \return value of given matrix element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixExample_getElement.cpp + * \par Output + * \include DenseMatrixExample_getElement.out + * */ __cuda_callable__ Real getElement( const IndexType row, @@ -411,20 +427,47 @@ class DenseMatrix : public Matrix< Real, Device, Index > * It is declared as `keep( const IndexType rowIdx, const double& value )`. * \tparam FetchValue is type returned by the Fetch lambda function. * - * \param first is an index of the first row the reduction will be performed on. - * \param last is an index of the row after the last row the reduction will be performed on. + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp + * \par Output + * \include DenseMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ); + + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. * \param fetch is an instance of lambda function for data fetch. * \param reduce is an instance of lambda function for reduction. * \param keep in an instance of lambda function for storing results. * \param zero is zero of given reduction operation also known as idempotent element. * * \par Example - * \include Matrices/DenseMatrixExample_rowsReduction.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_rowsReduction.cpp * \par Output * \include DenseMatrixExample_rowsReduction.out */ template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const; + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const; /** * \brief Method for performing general reduction on ALL matrix rows. @@ -444,7 +487,32 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param zero is zero of given reduction operation also known as idempotent element. * * \par Example - * \include Matrices/DenseMatrixExample_allRowsReduction.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp + * \par Output + * \include DenseMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on ALL matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixExample_allRowsReduction.cpp * \par Output * \include DenseMatrixExample_allRowsReduction.out */ @@ -456,47 +524,47 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \tparam Function is type of lambda function that will operate on matrix elements. * It is should have form like - * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`. * The column index repeats twice only for compatibility with sparse matrices. * If the 'compute' variable is set to false the iteration over the row can * be interrupted. * - * \param first is index is the first row to be processed. - * \param last is index of the row after the last row to be processed. + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. * \param function is an instance of the lambda function to be called in each row. * * \par Example - * \include Matrices/DenseMatrixExample_forRows.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp * \par Output * \include DenseMatrixExample_forRows.out */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; + void forRows( IndexType begin, IndexType end, Function& function ) const; /** * \brief Method for iteration over all matrix rows for non-constant instances. * * \tparam Function is type of lambda function that will operate on matrix elements. * It is should have form like - * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, RealType& value, bool& compute )`. + * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, RealType& value, bool& compute )`. * The column index repeats twice only for compatibility with sparse matrices. * If the 'compute' variable is set to false the iteration over the row can * be interrupted. * - * \param first is index is the first row to be processed. - * \param last is index of the row after the last row to be processed. + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. * \param function is an instance of the lambda function to be called in each row. * * \par Example - * \include Matrices/DenseMatrixExample_forRows.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_forRows.cpp * \par Output * \include DenseMatrixExample_forRows.out */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); + void forRows( IndexType begin, IndexType end, Function& function ); /** - * \brief This method calls \e forRows for all matrix rows. + * \brief This method calls \e forRows for all matrix rows (for constant instances). * * See \ref DenseMatrix::forRows. * @@ -504,7 +572,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param function is an instance of the lambda function to be called in each row. * * \par Example - * \include Matrices/DenseMatrixExample_forAllRows.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp * \par Output * \include DenseMatrixExample_forAllRows.out */ @@ -514,13 +582,13 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief This method calls \e forRows for all matrix rows. * - * See \ref DenseMatrix::forRows. + * See \ref DenseMatrix::forAllRows. * * \tparam Function is a type of lambda function that will operate on matrix elements. * \param function is an instance of the lambda function to be called in each row. * * \par Example - * \include Matrices/DenseMatrixExample_forAllRows.cpp + * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp * \par Output * \include DenseMatrixExample_forAllRows.out */ @@ -532,7 +600,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * More precisely, it computes: * - * outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector. + * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector` * * \tparam InVector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. @@ -579,7 +647,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; /** - * \brief Assignment operator for exactly the same type of the dense matrix. + * \brief Assignment operator with exactly the same type of the dense matrix. * * \param matrix is the right-hand side matrix. * \return reference to this matrix. @@ -587,7 +655,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > DenseMatrix& operator=( const DenseMatrix& matrix ); /** - * \brief Assignment operator for other dense matrices. + * \brief Assignment operator with other dense matrices. * * \param matrix is the right-hand side matrix. * \return reference to this matrix. @@ -597,7 +665,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > DenseMatrix& operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHSRealAllocator >& matrix ); /** - * \brief Assignment operator for other (sparse) types of matrices. + * \brief Assignment operator with other (sparse) types of matrices. * * \param matrix is the right-hand side matrix. * \return reference to this matrix. diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index 2b2fcc996fdc52b0f6530cf35c95a53c72ea276f..844fe576b4d67ba4b7b6de994b49103b4d57e9b1 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -22,7 +22,9 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -DenseMatrix< Real, Device, Index, Organization, RealAllocator >::DenseMatrix() +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +DenseMatrix( const RealAllocatorType& allocator ) +: Matrix< Real, Device, Index, RealAllocator >( allocator ) { } @@ -32,7 +34,9 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: -DenseMatrix( const IndexType rows, const IndexType columns ) +DenseMatrix( const IndexType rows, const IndexType columns, + const RealAllocatorType& allocator ) +: Matrix< Real, Device, Index, RealAllocator >( allocator ) { this->setDimensions( rows, columns ); } @@ -44,7 +48,9 @@ template< typename Real, typename RealAllocator > template< typename Value > DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: -DenseMatrix( std::initializer_list< std::initializer_list< Value > > data ) +DenseMatrix( std::initializer_list< std::initializer_list< Value > > data, + const RealAllocatorType& allocator ) +: Matrix< Real, Device, Index, RealAllocator >( allocator ) { this->setElements( data ); } @@ -199,18 +205,6 @@ getCompressedRowLengths( RowLengthsVector& rowLengths ) const this->view.getCompressedRowLengths( rowLengths ); } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization, - typename RealAllocator > -Index -DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: -getElementsCount() const -{ - return this->getRows() * this->getColumns(); -} - template< typename Real, typename Device, typename Index, @@ -346,9 +340,35 @@ template< typename Real, template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) { - this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); + this->view.rowsReduction( begin, end, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + this->view.rowsReduction( begin, end, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -372,9 +392,9 @@ template< typename Real, template< typename Function > void DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: -forRows( IndexType first, IndexType last, Function& function ) const +forRows( IndexType begin, IndexType end, Function& function ) const { - this->view.forRows( first, last, function ); + this->view.forRows( begin, end, function ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h index 78fecd0f7dc06572c62cc089d0e24fd180baf916..996c5a6e7e7c7351ad927eb50170b36a06508799 100644 --- a/src/TNL/Matrices/DenseMatrixRowView.h +++ b/src/TNL/Matrices/DenseMatrixRowView.h @@ -25,6 +25,11 @@ namespace TNL { * \include Matrices/DenseMatrixExample_getRow.cpp * \par Output * \include DenseMatrixExample_getRow.out + * + * \par Example + * \include Matrices/DenseMatrixViewExample_getRow.cpp + * \par Output + * \include DenseMatrixViewExample_getRow.out */ template< typename SegmentView, typename ValuesView > diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index e09253cb23136a3a265a8f06dc4181fdefafdb52..b0c50fd56ad98efc28c2ec752c57f87c6142ac7b 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -119,6 +119,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > * \param rows number of matrix rows. * \param columns number of matrix columns. * \param values is vector view with matrix elements values. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_constructor.cpp + * \par Output + * \include DenseMatrixViewExample_constructor.out + */ __cuda_callable__ DenseMatrixView( const IndexType rows, @@ -149,72 +155,416 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ ConstViewType getConstView() const; + /** + * \brief Returns string with serialization type. + * + * The string has a form \e `Matrices::DenseMatrix< RealType, [any_device], IndexType, [any_allocator], true/false >`. + * + * \return \e String with the serialization type. + */ static String getSerializationType(); + /** + * \brief Returns string with serialization type. + * + * See \ref DenseMatrixView::getSerializationType. + * + * \return \e String with the serialization type. + */ virtual String getSerializationTypeVirtual() const; + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getCompressedRowLengths.cpp + * \par Output + * \include DenseMatrixViewExample_getCompressedRowLengths.out + */ template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; - - IndexType getMaxRowLength() const; - - IndexType getElementsCount() const; + /** + * \brief Returns number of all matrix elements. + * + * This method is here mainly for compatibility with sparse matrices since + * the number of all matrix elements is just number of rows times number of + * columns. + * + * \return number of all matrix elements. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp + * \par Output + * \include DenseMatrixViewExample_getElementsCount.out + */ + IndexType getAllocatedElementsCount() const; + /** + * \brief Returns number of non-zero matrix elements. + * + * \return number of all non-zero matrix elements. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElementsCount.cpp + * \par Output + * \include DenseMatrixViewExample_getElementsCount.out + */ IndexType getNonzeroElementsCount() const; + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getConstRow.cpp + * \par Output + * \include DenseMatrixViewExample_getConstRow.out + * + * See \ref DenseMatrixRowView. + */ __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getRow.cpp + * \par Output + * \include DenseMatrixExample_getRow.out + * + * See \ref DenseMatrixRowView. + */ __cuda_callable__ RowView getRow( const IndexType& rowIdx ); - + /** + * \brief Sets all matrix elements to value \e v. + * + * \param v is value all matrix elements will be set to. + */ void setValue( const RealType& v ); + /** + * \brief Returns non-constant reference to element at row \e row and column column. + * + * Since this method returns reference to the element, it cannot be called across + * different address spaces. It means that it can be called only form CPU if the matrix + * is allocated on CPU or only from GPU kernels if the matrix is allocated on GPU. + * + * \param row is a row index of the element. + * \param column is a columns index of the element. + * \return reference to given matrix element. + */ __cuda_callable__ Real& operator()( const IndexType row, const IndexType column ); + /** + * \brief Returns constant reference to element at row \e row and column column. + * + * Since this method returns reference to the element, it cannot be called across + * different address spaces. It means that it can be called only form CPU if the matrix + * is allocated on CPU or only from GPU kernels if the matrix is allocated on GPU. + * + * \param row is a row index of the element. + * \param column is a columns index of the element. + * \return reference to given matrix element. + */ __cuda_callable__ const Real& operator()( const IndexType row, const IndexType column ) const; + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref DenseMatrix::getRow + * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_setElement.cpp + * \par Output + * \include DenseMatrixExample_setElement.out + */ __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref DenseMatrix::getRow + * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_addElement.cpp + * \par Output + * \include DenseMatrixExample_addElement.out + * + */ __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref DenseMatrix::getRow + * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_getElement.cpp + * \par Output + * \include DenseMatrixExample_getElement.out + * + */ __cuda_callable__ Real getElement( const IndexType row, const IndexType column ) const; + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp + * \par Output + * \include DenseMatrixViewExample_rowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_rowsReduction.cpp + * \par Output + * \include DenseMatrixViewExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for performing general reduction on ALL matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include DenseMatrixViewExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on ALL matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include DenseMatrixViewExample_allRowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The column index repeats twice only for compatibility with sparse matrices. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp + * \par Output + * \include DenseMatrixViewExample_forRows.out + */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; + void forRows( IndexType begin, IndexType end, Function& function ) const; + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx, RealType& value, bool& compute )`. + * The column index repeats twice only for compatibility with sparse matrices. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_forRows.cpp + * \par Output + * \include DenseMatrixViewExample_forRows.out + */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); + void forRows( IndexType begin, IndexType end, Function& function ); + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref DenseMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixViewExample_forAllRows.cpp + * \par Output + * \include DenseMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ) const; + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref DenseMatrix::forAllRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/DenseMatrix/DenseMatrixExample_forAllRows.cpp + * \par Output + * \include DenseMatrixExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ); + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector, @@ -244,12 +594,37 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; + /** + * \brief Assignment operator with DenseMatrix. + * + * \param matrix is the right-hand side matrix. + * \return reference to this matrix. + */ DenseMatrixView& operator=( const DenseMatrixView& matrix ); + /** + * \brief Method for saving the matrix view to the file with given filename. + * + * The ouput file can be loaded by \ref DenseMatrix. + * + * \param fileName is name of the file. + */ void save( const String& fileName ) const; + /** + * \brief Method for saving the matrix view to a file. + * + * The ouput file can be loaded by \ref DenseMatrix. + * + * \param fileName is name of the file. + */ void save( File& file ) const; + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ void print( std::ostream& str ) const; protected: @@ -258,9 +633,6 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getElementIndex( const IndexType row, const IndexType column ) const; - //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; - //friend class DenseDeviceDependentCode< DeviceType >; - SegmentsViewType segments; }; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 2ba34e549ddf2219a0b723699f62afd7451b67f8..b96a8475b12efb0ba16acec67944e1a950a32065 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -40,6 +40,13 @@ DenseMatrixView( const IndexType rows, const ValuesViewType& values ) : MatrixView< Real, Device, Index >( rows, columns, values ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_EQ( values.getSize(), this->getAllocatedElementsCount(), "Number of matrix elements does not agree with matrix dimensions." ); +#else + if( values.getSize() != this->getAllocatedElementsCount() ) + throw( std::logic_error( "Number of matrix elements does not agree with matrix dimensions." ) ); +#endif + SegmentsType a( rows, columns ); segments = a.getView(); } @@ -82,9 +89,9 @@ DenseMatrixView< Real, Device, Index, Organization >:: getSerializationType() { return String( "Matrices::DenseMatrix< " ) + - TNL::getSerializationType< RealType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + ", " + - ( Organization ? "true" : "false" ) + ", [any_allocator] >"; + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + TNL::getSerializationType( Organization ) + " >"; } template< typename Real, @@ -125,29 +132,7 @@ template< typename Real, ElementsOrganization Organization > Index DenseMatrixView< Real, Device, Index, Organization >:: -getRowLength( const IndexType row ) const -{ - return this->getColumns(); -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -DenseMatrixView< Real, Device, Index, Organization >:: -getMaxRowLength() const -{ - return this->getColumns(); -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -DenseMatrixView< Real, Device, Index, Organization >:: -getElementsCount() const +getAllocatedElementsCount() const { return this->getRows() * this->getColumns(); } @@ -286,14 +271,43 @@ template< typename Real, template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void DenseMatrixView< Real, Device, Index, Organization >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) +{ + auto values_view = this->values.getView(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +DenseMatrixView< Real, Device, Index, Organization >:: +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto values_view = this->values.getConstView(); auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return zero; }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +DenseMatrixView< Real, Device, Index, Organization >:: +allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -315,14 +329,13 @@ template< typename Real, template< typename Function > void DenseMatrixView< Real, Device, Index, Organization >:: -forRows( IndexType first, IndexType last, Function& function ) const +forRows( IndexType begin, IndexType end, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute ); }; - this->segments.forSegments( first, last, f ); - + this->segments.forSegments( begin, end, f ); } template< typename Real, @@ -332,14 +345,13 @@ template< typename Real, template< typename Function > void DenseMatrixView< Real, Device, Index, Organization >:: -forRows( IndexType first, IndexType last, Function& function ) +forRows( IndexType begin, IndexType end, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute ); }; - this->segments.forSegments( first, last, f ); - + this->segments.forSegments( begin, end, f ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseRow.h b/src/TNL/Matrices/DenseRow.h deleted file mode 100644 index a1d6d2124b1df9d7da0dcc98b26aa83431bfeaa9..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/DenseRow.h +++ /dev/null @@ -1,50 +0,0 @@ -/*************************************************************************** - DenseRow.h - description - ------------------- - begin : Dec 24, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -class DenseRow -{ - public: - - __cuda_callable__ - DenseRow(); - - __cuda_callable__ - DenseRow( Real* values, - const Index columns, - const Index step ); - - __cuda_callable__ - void bind( Real* values, - const Index columns, - const Index step ); - - __cuda_callable__ - void setElement( const Index& elementIndex, - const Index& column, - const Real& value ); - - protected: - - Real* values; - - Index columns, step; -}; - -} // namespace Matrices -} // namespace TNL - -#include - diff --git a/src/TNL/Matrices/DenseRow_impl.h b/src/TNL/Matrices/DenseRow_impl.h deleted file mode 100644 index 7b1bac1a5bdc5074b5f22b4d3b5d86046e605011..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/DenseRow_impl.h +++ /dev/null @@ -1,68 +0,0 @@ -/*************************************************************************** - DenseRow_impl.h - description - ------------------- - begin : Dec 24, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -__cuda_callable__ -DenseRow< Real, Index >:: -DenseRow() -: values( 0 ), - columns( 0 ), - step( 0 ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -DenseRow< Real, Index >:: -DenseRow( Real* values, - const Index columns, - const Index step ) -: values( values ), - columns( columns ), - step( step ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -void -DenseRow< Real, Index >:: -bind( Real* values, - const Index columns, - const Index step ) -{ - this->values = values; - this->columns = columns; - this->step = step; -} - -template< typename Real, typename Index > -__cuda_callable__ -void -DenseRow< Real, Index >:: -setElement( const Index& elementIndex, - const Index& column, - const Real& value ) -{ - TNL_ASSERT( this->values, ); - TNL_ASSERT( this->step > 0,); - TNL_ASSERT( column >= 0 && column < this->columns, - std::cerr << "column = " << column << " this->columns = " << this->columns ); - - this->values[ column * this->step ] = value; -} - -} // namespace Matrices -} // namespace TNL diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index 4785a6790978b8229ca1207ee6f9cd7b71a32103..83c1bd682c719f992ca06b6f3f2fdf17942ab73d 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -218,7 +218,7 @@ public: protected: // communication pattern - Matrices::DenseMatrix< IndexType, Devices::Host, int, Containers::Segments::RowMajorOrder, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds; + Matrices::DenseMatrix< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds; // span of rows with only block-diagonal entries std::pair< IndexType, IndexType > localOnlySpan; diff --git a/src/TNL/Matrices/LambdaMatrix.h b/src/TNL/Matrices/LambdaMatrix.h index 5f3ecdfb36bc447f44578c4c0493ae635b1e7cd4..1692510e70ecfeb0aa3e5365d7535db47c221599 100644 --- a/src/TNL/Matrices/LambdaMatrix.h +++ b/src/TNL/Matrices/LambdaMatrix.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace TNL { namespace Matrices { @@ -19,87 +19,292 @@ namespace Matrices { /** * \brief "Matrix-free" matrix based on lambda functions. * - * \tparam MatrixElementsLambda is a lambda function returning matrix elements - * values and positions. - * \tparam CompressedRowLengthsLambda is a lambda function returning a number - * of non-zero elements in each row. + * The elements of this matrix are not stored explicitly in memory but + * implicitly on a form of lambda functions. + * + * \tparam MatrixElementsLambda is a lambda function returning matrix elements values and positions. + * + * It has the following form: + * + * `matrixElements( IndexType rows, IndexType columns, IndexType row, IndexType localIdx, IndexType& elementColumn, RealType& elementValue )` + * + * where \e rows is the number of matrix rows, \e columns is the number of matrix columns, \e row is the index of matrix row being queried, + * \e localIdx is the rank of the non-zero element in given row, \e elementColumn is a column index of the matrix element computed by + * this lambda and \e elementValue is a value of the matrix element computed by this lambda. + * \tparam CompressedRowLengthsLambda is a lambda function returning a number of non-zero elements in each row. + * + * It has the following form: + * + * `rowLengths( IndexType rows, IndexType columns, IndexType row ) -> IndexType` + * + * where \e rows is the number of matrix rows, \e columns is the number of matrix columns and \e row is an index of the row being queried. + * * \tparam Real is a type of matrix elements values. - * \tparam Device is a device on which the lambda functions can evaluated. - * Devices::AnyDevice can be used for lambdas with no restriction. + * \tparam Device is a device on which the lambda functions will be evaluated. * \ẗparam Index is a type used for indexing. */ template< typename MatrixElementsLambda, typename CompressedRowLengthsLambda, typename Real = double, - typename Device = Devices::AnyDevice, + typename Device = Devices::Host, typename Index = int > class LambdaMatrix { public: - static constexpr bool isSymmetric() { return false; }; - static constexpr bool isBinary() { return false; }; + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ using IndexType = Index; + static constexpr bool isSymmetric() { return false; }; + static constexpr bool isBinary() { return false; }; + + /** + * \brief Constructor with lambda functions defining the matrix elements. + * + * Note: It might be difficult to express the types of the lambdas. For easier creation of + * \e LambdaMatrix you may use \ref LambdaMatrixFactory. + * + * \param matrixElements is a lambda function giving matrix elements position and value. + * \param compressedRowLentghs is a lambda function returning how many non-zero matrix elements are in given row. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp + * \par Output + * \include LambdaMatrixExample_Constructor.out + */ LambdaMatrix( MatrixElementsLambda& matrixElements, CompressedRowLengthsLambda& compressedRowLentghs ); + /** + * \brief Constructor with matrix dimensions and lambda functions defining the matrix elements. + * + * Note: It might be difficult to express the types of the lambdas. For easier creation of + * \e LambdaMatrix you may use \ref LambdaMatrixFactory. + * + * \param rows is a number of the matrix rows. + * \param columns is a number of the matrix columns. + * \param matrixElements is a lambda function giving matrix elements position and value. + * \param compressedRowLentghs is a lambda function returning how many non-zero matrix elements are in given row. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp + * \par Output + * \include LambdaMatrixExample_Constructor.out + */ LambdaMatrix( const IndexType& rows, const IndexType& columns, MatrixElementsLambda& matrixElements, CompressedRowLengthsLambda& compressedRowLentghs ); + /** + * \brief Copy constructor. + * + * \param matrix is input matrix. + */ + LambdaMatrix( const LambdaMatrix& matrix ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is input matrix. + */ + LambdaMatrix( LambdaMatrix&& matrix ) = default; + + /** + * \brief Set number of rows and columns of this matrix. + * + * \param rows is the number of matrix rows. + * \param columns is the number of matrix columns. + */ void setDimensions( const IndexType& rows, const IndexType& columns ); + /** + * \brief Returns a number of matrix rows. + * + * \return number of matrix rows. + */ __cuda_callable__ IndexType getRows() const; + /** + * \brief Returns a number of matrix columns. + * + * \return number of matrix columns. + */ __cuda_callable__ IndexType getColumns() const; - template< typename Vector > - void getCompressedRowLengths( Vector& rowLengths ) const; - - IndexType getNumberOfNonzeroMatrixElements() const; - + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_getCompressedRowLengths.cpp + * \par Output + * \include LambdaMatrixExample_getCompressedRowLengths.out + */ + template< typename RowLentghsVector > + void getCompressedRowLengths( RowLentghsVector& rowLengths ) const; + + /** + * \brief Returns number of non-zero matrix elements. + * + * \return number of all non-zero matrix elements. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_getElementsCount.cpp + * \par Output + * \include LambdaMatrixExample_getElementsCount.out + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + */ RealType getElement( const IndexType row, const IndexType column ) const; - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - - /*** - * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_rowsReduction.cpp + * \par Output + * \include LambdaMatrixExample_rowsReduction.out */ - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector, - const RealType& matrixMultiplicator = 1.0, - const RealType& outVectorMultiplicator = 0.0 ) const; - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for performing general reduction on ALL matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType columnIdx, RealType elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_allRowsReduction.cpp + * \par Output + * \include LambdaMatrixExample_allRowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType columnIdx, IndexType columnIdx_, const RealType& value, bool& compute )`. + * The column index repeats twice only for compatibility with sparse matrices. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_forRows.cpp + * \par Output + * \include LambdaMatrixExample_forRows.out + */ template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); - + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref LambdaMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_forAllRows.cpp + * \par Output + * \include LambdaMatrixExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ) const; - template< typename Function > - void forAllRows( Function& function ); + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0, + const IndexType begin = 0, + IndexType end = 0 ) const; + template< typename Vector1, typename Vector2 > void performSORIteration( const Vector1& b, @@ -107,6 +312,11 @@ class LambdaMatrix Vector2& x, const RealType& omega = 1.0 ) const; + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ void print( std::ostream& str ) const; protected: @@ -118,21 +328,49 @@ class LambdaMatrix CompressedRowLengthsLambda compressedRowLengthsLambda; }; +/** + * \brief Insertion operator for dense matrix and output stream. + * + * \param str is the output stream. + * \param matrix is the lambda matrix. + * \return reference to the stream. + */ +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +std::ostream& operator<< ( std::ostream& str, const LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >& matrix ); /** * \brief Helper class for creating instances of LambdaMatrix. - * @param matrixElementsLambda - * @param compressedRowLengthsLambda - * @return + * + * See \ref LambdaMatrix. + * + * \param matrixElementsLambda + * \param compressedRowLengthsLambda */ template< typename Real = double, - typename Device = Devices::AnyDevice, + typename Device = Devices::Host, typename Index = int > struct LambdaMatrixFactory { using RealType = Real; using IndexType = Index; - + + /** + * \brief Creates lambda matrix with given lambda functions. + * + * \param matrixElementsLambda is a lambda function evaluating matrix elements. + * \param compressedRowLengthsLambda is a lambda function returning number of + * non-zero matrix elements in given \e row. + * \return instance of LambdaMatrix. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp + * \par Output + * \include LambdaMatrixExample_Constructor.out + */ template< typename MatrixElementsLambda, typename CompressedRowLengthsLambda > static auto create( MatrixElementsLambda& matrixElementsLambda, @@ -143,7 +381,22 @@ struct LambdaMatrixFactory matrixElementsLambda, compressedRowLengthsLambda ); }; - + + /** + * \brief Creates lambda matrix with given dimensions and lambda functions. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param matrixElementsLambda is a lambda function evaluating matrix elements. + * \param compressedRowLengthsLambda is a lambda function returning number of + * non-zero matrix elements in given \e row. + * \return instance of LambdaMatrix. + * + * \par Example + * \include Matrices/LambdaMatrix/LambdaMatrixExample_Constructor.cpp + * \par Output + * \include LambdaMatrixExample_Constructor.out + */ template< typename MatrixElementsLambda, typename CompressedRowLengthsLambda > static auto create( const IndexType& rows, @@ -153,6 +406,7 @@ struct LambdaMatrixFactory -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index > { return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >( + rows, columns, matrixElementsLambda, compressedRowLengthsLambda ); }; diff --git a/src/TNL/Matrices/LambdaMatrix.hpp b/src/TNL/Matrices/LambdaMatrix.hpp index c992bd575bd7f2e09bbd50ded60191a90ceca297..7e606d1e7f49991eaf7f09cb05c618c289f817eb 100644 --- a/src/TNL/Matrices/LambdaMatrix.hpp +++ b/src/TNL/Matrices/LambdaMatrix.hpp @@ -10,10 +10,13 @@ #pragma once +#include +#include #include #include #include #include +#include namespace TNL { namespace Matrices { @@ -94,33 +97,16 @@ void LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: getCompressedRowLengths( Vector& rowLengths ) const { - using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; - - rowLengths.setSize( this->getRows() ); - const IndexType rows = this->getRows(); - const IndexType columns = this->getColumns(); - auto rowLengthsView = rowLengths.getView(); - auto compressedRowLengths = this->compressedRowLengthsLambda; - - if( std::is_same< typename Vector::DeviceType, Device_ >::value ) - Algorithms::ParallelFor< Device_ >::exec( - ( IndexType ) 0, - this->getRows(), - [=] __cuda_callable__ ( const IndexType row ) mutable { - rowLengthsView[ row ] = compressedRowLengths( rows, columns, row ); - } ); - else - { - Containers::Vector< IndexType, Device_, IndexType > aux( this->getRows() ); - auto auxView = aux.getView(); - Algorithms::ParallelFor< Device_ >::exec( - ( IndexType ) 0, - this->getRows(), - [=] __cuda_callable__ ( const IndexType row ) mutable { - auxView[ row ] = compressedRowLengths( rows, columns, row ); - } ); - rowLengths = aux; - } + details::set_size_if_resizable( rowLengths, this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, std::plus<>{}, keep, 0 ); } template< typename MatrixElementsLambda, @@ -130,9 +116,9 @@ template< typename MatrixElementsLambda, typename Index > Index LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { - Containers::Vector< IndexType, typename Devices::PickDevice< DeviceType >::DeviceType, IndexType > rowLengthsVector; + Containers::Vector< IndexType, DeviceType, IndexType > rowLengthsVector; this->getCompressedRowLengths( rowLengthsVector ); return sum( rowLengthsVector ); } @@ -147,8 +133,7 @@ LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, In getElement( const IndexType row, const IndexType column ) const { - using Device_ = typename Devices::PickDevice< Devices::Host >::DeviceType; - Containers::Array< RealType, Device_ > value( 1 ); + Containers::Array< RealType, DeviceType > value( 1 ); auto valueView = value.getView(); auto rowLengths = this->compressedRowLengthsLambda; auto matrixElements = this->matrixElementsLambda; @@ -169,25 +154,10 @@ getElement( const IndexType row, } } }; - Algorithms::ParallelFor< Device_ >::exec( row, row + 1, getValue ); + Algorithms::ParallelFor< DeviceType >::exec( row, row + 1, getValue ); return valueView.getElement( 0 ); } -template< typename MatrixElementsLambda, - typename CompressedRowLengthsLambda, - typename Real, - typename Device, - typename Index > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: -rowVectorProduct( const IndexType row, - const Vector& vector ) const -{ - -} - template< typename MatrixElementsLambda, typename CompressedRowLengthsLambda, typename Real, @@ -200,20 +170,22 @@ LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, In vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, - const RealType& outVectorMultiplicator ) const + const RealType& outVectorMultiplicator, + const IndexType begin, + IndexType end ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType columnIdx, const RealType& value ) mutable -> RealType { if( value == 0.0 ) return 0.0; return value * inVectorView[ columnIdx ]; }; - auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { - sum += value; + auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) -> RealType { + return sum + value; }; auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { if( outVectorMultiplicator == 0.0 ) @@ -221,7 +193,9 @@ vectorProduct( const InVector& inVector, else outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; }; - this->allRowsReduction( fetch, reduce, keep, 0.0 ); + if( ! end ) + end = this->getRows(); + this->rowsReduction( begin, end, fetch, reduce, keep, 0.0 ); } template< typename MatrixElementsLambda, @@ -232,10 +206,9 @@ template< typename MatrixElementsLambda, template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - using FetchType = decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ); - using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; + using FetchType = decltype( fetch( IndexType(), IndexType(), RealType() ) ); const IndexType rows = this->getRows(); const IndexType columns = this->getColumns(); @@ -251,12 +224,12 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue ); FetchType fetchValue( zero ); if( elementValue != 0.0 ) - fetchValue = fetch( rowIdx, localIdx, elementColumn, elementValue ); - reduce( result, fetchValue ); + fetchValue = fetch( rowIdx, elementColumn, elementValue ); + result = reduce( result, fetchValue ); } keep( rowIdx, result ); }; - Algorithms::ParallelFor< Device_ >::exec( first, last, processRow ); + Algorithms::ParallelFor< DeviceType >::exec( first, last, processRow ); } template< typename MatrixElementsLambda, @@ -267,7 +240,7 @@ template< typename MatrixElementsLambda, template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: -allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } @@ -282,9 +255,6 @@ void LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: forRows( IndexType first, IndexType last, Function& function ) const { - using FetchType = decltype( fetch( IndexType(), IndexType(), RealType(), IndexType() ) ); - using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; - const IndexType rows = this->getRows(); const IndexType columns = this->getColumns(); auto rowLengths = this->compressedRowLengthsLambda; @@ -301,7 +271,7 @@ forRows( IndexType first, IndexType last, Function& function ) const function( rowIdx, localIdx, elementColumn, elementValue, compute ); } }; - Algorithms::ParallelFor< Device_ >::exec( first, last, processRow ); + Algorithms::ParallelFor< DeviceType >::exec( first, last, processRow ); } template< typename MatrixElementsLambda, @@ -312,9 +282,25 @@ template< typename MatrixElementsLambda, template< typename Function > void LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: -forRows( IndexType first, IndexType last, Function& function ) +forAllRows( Function& function ) const { - this->forRows( 0, this->getRows(), function ); + const IndexType rows = this->getRows(); + const IndexType columns = this->getColumns(); + auto rowLengths = this->compressedRowLengthsLambda; + auto matrixElements = this->matrixElementsLambda; + auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + const IndexType rowLength = rowLengths( rows, columns, rowIdx ); + bool compute( true ); + for( IndexType localIdx = 0; localIdx < rowLength && compute; localIdx++ ) + { + IndexType elementColumn( 0 ); + RealType elementValue( 0.0 ); + matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue ); + if( elementValue != 0.0 ) + function( rowIdx, localIdx, elementColumn, elementValue, compute ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( 0, this->getRows(), processRow ); } template< typename MatrixElementsLambda, @@ -347,13 +333,35 @@ print( std::ostream& str ) const str <<"Row: " << row << " -> "; for( IndexType column = 0; column < this->getColumns(); column++ ) { - auto value = this->getElement( row, column ); - if( value != ( RealType ) 0 ) - str << " Col:" << column << "->" << value << "\t"; + RealType value = this->getElement( row, column ); + if( value ) + { + std::stringstream str_; + str_ << std::setw( 4 ) << std::right << column << ":" << std::setw( 4 ) << std::left << value; + str << std::setw( 10 ) << str_.str(); + } } str << std::endl; } } +/** + * \brief Insertion operator for dense matrix and output stream. + * + * \param str is the output stream. + * \param matrix is the lambda matrix. + * \return reference to the stream. + */ +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +std::ostream& operator<< ( std::ostream& str, const LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >& matrix ) +{ + matrix.print( str ); + return str; +} + } //namespace Matrices } //namespace TNL diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 4b954d314ab7775ec00b65ac31bea4c18684c4ff..210332bcd77603ee01000bf59e076460b123a34a 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -58,7 +58,7 @@ public: IndexType getAllocatedElementsCount() const; - IndexType getNumberOfNonzeroMatrixElements() const; + IndexType getNonzeroElementsCount() const; void reset(); diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index ce5f52274ec1134f30a52b64bf1572b7d757dc84..adacaee57f961db424cb85ef548dc7b1189f24fd 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -79,7 +79,7 @@ template< typename Real, typename Device, typename Index, typename RealAllocator > -Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const +Index Matrix< Real, Device, Index, RealAllocator >::getNonzeroElementsCount() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h index 1d825a769d1d2e78b944c7bf464353585d3188b9..65c2aca63a798c920da05958ab293054c0d1f7f2 100644 --- a/src/TNL/Matrices/MatrixInfo.h +++ b/src/TNL/Matrices/MatrixInfo.h @@ -34,6 +34,8 @@ template< typename Matrix > struct MatrixInfo {}; +/// This is to prevent from appearing in Doxygen documentation. +/// \cond HIDDEN_CLASS template< typename Real, typename Device, typename Index, @@ -151,5 +153,6 @@ struct MatrixInfo< Legacy::SlicedEllpack< Real, Device, Index, SliceSize> > static String getFormat() { return "SlicedEllpack Legacy"; }; }; +/// \endcond } //namespace Matrices } //namespace TNL diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h index ae0606678f1b9167b10fd4b9e4868847c41c9b99..c9960982ac79086a3b1fee2ea08dc438e1359d49 100644 --- a/src/TNL/Matrices/MatrixReader.h +++ b/src/TNL/Matrices/MatrixReader.h @@ -17,9 +17,12 @@ namespace TNL { namespace Matrices { +/// This is to prevent from appearing in Doxygen documentation. +/// \cond HIDDEN_CLASS template< typename Device > class MatrixReaderDeviceDependentCode {}; +/// \endcond template< typename Matrix > class MatrixReader diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index df2c05c63237c23e0fa26ba331ac8a3d2da03b8c..862d4a285cc3565f9c37309336fa333523d6c111 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -357,6 +357,8 @@ void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, value = ( RealType ) atof( parsedLine[ 2 ].getString() ); } +/// This is to prevent from appearing in Doxygen documentation. +/// \cond HIDDEN_CLASS template<> class MatrixReaderDeviceDependentCode< Devices::Host > { @@ -392,6 +394,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); } }; +/// \endcond } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h index 8d4cfe7ba4d592ca117005127660f800df287906..ad1faaa8ba8e665fcb81b6b37ecaa594b4df2608 100644 --- a/src/TNL/Matrices/MatrixType.h +++ b/src/TNL/Matrices/MatrixType.h @@ -11,8 +11,13 @@ #pragma once namespace TNL { - namespace Matrices { +namespace Matrices { +/** + * \brief Structure for specifying type of sparse matrix. + * + * It is used for specification of \ref SparseMatrix type. + */ template< bool Symmetric, bool Binary > struct MatrixType @@ -21,43 +26,65 @@ struct MatrixType static constexpr bool isBinary() { return Binary; } + static String getSerializationType() { + String type; + if( ! isBinary() && ! isSymmetric() ) + type = "General"; + else + { + if( isSymmetric ) type = "Symmetric"; + if( isBinary ) type += "Binary"; + } + return type; + } }; -struct GeneralMatrix -{ - static constexpr bool isSymmetric() { return false; } - - static constexpr bool isBinary() { return false; } -}; - -struct SymmetricMatrix -{ - static constexpr bool isSymmetric() { return true; } - - static constexpr bool isBinary() { return false; } -}; - -struct BinaryMatrix -{ - static constexpr bool isSymmetric() { return false; } - - static constexpr bool isBinary() { return true; } -}; - -struct BinarySymmetricMatrix -{ - static constexpr bool isSymmetric() { return false; } - - static constexpr bool isBinary() { return true; } -}; - -struct SymmetricBinaryMatrix -{ - static constexpr bool isSymmetric() { return false; } - - static constexpr bool isBinary() { return true; } -}; - - - } //namespace Matrices -} //namespace TNL \ No newline at end of file +/** + * \brief General non-symmetric matrix type. + * + * It is used for specification of \ref SparseMatrix type. + */ +struct GeneralMatrix : MatrixType< false, false > {}; + +/** + * \brief Symmetric matrix type. + * + * Symmetric matrix stores only lower part of the matrix and its diagonal. The + * upper part is reconstructed on the fly. + * It is used for specification of \ref SparseMatrix type. + */ +struct SymmetricMatrix : MatrixType< true, false > {}; + +/** + * \brief Binary matrix type. + * + * Binary matrix does not store explictly values of matrix elements and thus + * it reduces memory consumption. + * It is used for specification of \ref SparseMatrix type. + */ +struct BinaryMatrix : MatrixType< false, true > {}; + +/** + * \brief Symmetric and binary matrix type. + * + * Symmetric matrix stores only lower part of the matrix and its diagonal. The + * upper part is reconstructed on the fly. + * Binary matrix does not store explictly values of matrix elements and thus + * it reduces memory consumption. + * It is used for specification of \ref SparseMatrix type. + */ +struct BinarySymmetricMatrix : MatrixType< true, true > {}; + +/** + * \brief Symmetric and binary matrix type. + * + * Symmetric matrix stores only lower part of the matrix and its diagonal. The + * upper part is reconstructed on the fly. + * Binary matrix does not store explictly values of matrix elements and thus + * it reduces memory consumption. + * It is used for specification of \ref SparseMatrix type. + */ +struct SymmetricBinaryMatrix : MatrixType< true, true > {}; + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 76a3948a98792388120097cc7e20190ba58c95e5..9c23e539f02cfb412db82b58d771c5923c57cf13 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -49,9 +49,12 @@ public: __cuda_callable__ MatrixView( const MatrixView& view ) = default; + __cuda_callable__ + MatrixView( MatrixView&& view ) = default; + IndexType getAllocatedElementsCount() const; - virtual IndexType getNumberOfNonzeroMatrixElements() const; + virtual IndexType getNonzeroElementsCount() const; __cuda_callable__ IndexType getRows() const; diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index b2b181e4c4671607728bfb9f37935a23fe258a30..e10874201eb587110238570b5978b42cdc0cf7f2 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -57,7 +57,7 @@ template< typename Real, typename Index > Index MatrixView< Real, Device, Index >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h deleted file mode 100644 index 9823a7b6198191cb601eaf8ea5403254fc5ff88a..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/Multidiagonal.h +++ /dev/null @@ -1,227 +0,0 @@ -/*************************************************************************** - Multidiagonal.h - description - ------------------- - begin : Oct 13, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace TNL { -namespace Matrices { - -template< typename Real = double, - typename Device = Devices::Host, - typename Index = int, - ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), - typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, - typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > -class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > -{ - public: - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - using RealAllocatorType = RealAllocator; - using IndexAllocatorType = IndexAllocator; - using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using ValuesVectorType = typename BaseType::ValuesVectorType; - using ValuesViewType = typename ValuesVectorType::ViewType; - using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >; - using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; - using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; - using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; - using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >; - using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; - - using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; - using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType; - - - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Multidiagonal< _Real, _Device, _Index >; - - static constexpr ElementsOrganization getOrganization() { return Organization; }; - - Multidiagonal(); - - Multidiagonal( const IndexType rows, - const IndexType columns ); - - template< typename Vector > - Multidiagonal( const IndexType rows, - const IndexType columns, - const Vector& diagonalsShifts ); - - ViewType getView() const; // TODO: remove const - - //ConstViewType getConstView() const; - - static String getSerializationType(); - - virtual String getSerializationTypeVirtual() const; - - template< typename Vector > - void setDimensions( const IndexType rows, - const IndexType columns, - const Vector& diagonalsShifts ); - - //template< typename Vector > - void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); - - const IndexType& getDiagonalsCount() const; - - const DiagonalsShiftsType& getDiagonalsShifts() const; - - template< typename Vector > - void getCompressedRowLengths( Vector& rowLengths ) const; - - IndexType getNonemptyRowsCount() const; - - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; - - IndexType getMaxRowLength() const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - void setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ); - - IndexType getNumberOfNonzeroMatrixElements() const; - - void reset(); - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - bool operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - bool operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - - __cuda_callable__ - RowView getRow( const IndexType& rowIdx ); - - __cuda_callable__ - const RowView getRow( const IndexType& rowIdx ) const; - - void setValue( const RealType& v ); - - void setElement( const IndexType row, - const IndexType column, - const RealType& value ); - - void addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - RealType getElement( const IndexType row, - const IndexType column ) const; - - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); - - template< typename Function > - void forAllRows( Function& function ) const; - - template< typename Function > - void forAllRows( Function& function ); - - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - void addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); - - template< typename Real2, typename Index2 > - void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); - - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; - - // copy assignment - Multidiagonal& operator=( const Multidiagonal& matrix ); - - // cross-device copy assignment - template< typename Real_, - typename Device_, - typename Index_, - ElementsOrganization Organization_, - typename RealAllocator_, - typename IndexAllocator_ > - Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ); - - void save( File& file ) const; - - void load( File& file ); - - void save( const String& fileName ) const; - - void load( const String& fileName ); - - void print( std::ostream& str ) const; - - const IndexerType& getIndexer() const; - - IndexerType& getIndexer(); - - __cuda_callable__ - IndexType getPaddingIndex() const; - - protected: - - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType localIdx ) const; - - DiagonalsShiftsType diagonalsShifts; - - HostDiagonalsShiftsType hostDiagonalsShifts; - - IndexerType indexer; - - ViewType view; -}; - -} // namespace Matrices -} // namespace TNL - -#include diff --git a/src/TNL/Matrices/MultidiagonalMatrix.h b/src/TNL/Matrices/MultidiagonalMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..74cd7a4fe3748180c8f2134dfae145f09910ff11 --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrix.h @@ -0,0 +1,882 @@ +/*************************************************************************** + Multidiagonal.h - description + ------------------- + begin : Oct 13, 2011 + copyright : (C) 2011 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +/** + * \brief Implementation of sparse multidiagonal matrix. + * + * Use this matrix type for storing of matrices where the offsets of non-zero elements + * from the diagonal are the same in each row. Typically such matrices arise from + * discretization of partial differential equations on regular numerical grids. This is + * one example (dots represent zero matrix elements): + * + * \f[ + * \left( + * \begin{array}{ccccccc} + * 4 & -1 & . & -1 & . & . \\ + * -1 & 4 & -1 & . & -1 & . \\ + * . & -1 & 4 & -1 & . & -1 \\ + * -1 & . & -1 & 4 & -1 & . \\ + * . & -1 & . & -1 & 4 & -1 \\ + * . & . & -1 & . & -1 & 4 + * \end{array} + * \right) + * \f] + * + * In this matrix, the column indexes in each row \f$i\f$ can be expressed as + * \f$\{i-3, i-1, i, i+1, i+3\}\f$ (where the resulting index is non-negative and + * smaller than the number of matrix columns). Therefore the diagonals offsets + * are \f$\{-3,-1,0,1,3\}\f$. Advantage is that we do not store the column indexes + * explicitly as it is in \ref SparseMatrix. This can reduce significantly the + * memory requirements which also means better performance. See the following table + * for the storage requirements comparison between \ref MultidiagonalMatrix and \ref SparseMatrix. + * + * Data types | SparseMatrix | MultidiagonalMatrix | Ratio + * --------------------|----------------------|---------------------|-------- + * float + 32-bit int | 8 bytes per element | 4 bytes per element | 50% + * double + 32-bit int| 12 bytes per element | 8 bytes per element | 75% + * float + 64-bit int | 12 bytes per element | 4 bytes per element | 30% + * double + 64-bit int| 16 bytes per element | 8 bytes per element | 50% + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. + * \tparam RealAllocator is allocator for the matrix elements. + * \tparam IndexAllocator is allocator for the matrix elements offsets. + */ +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class MultidiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator > +{ + public: + + // Supporting types - they are not important for the user + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using IndexerType = details::MultidiagonalMatrixIndexer< Index, Organization >; + using DiagonalsOffsetsType = Containers::Vector< Index, Device, Index, IndexAllocator >; + using DiagonalsOffsetsView = typename DiagonalsOffsetsType::ViewType; + using HostDiagonalsOffsetsType = Containers::Vector< Index, Devices::Host, Index >; + using HostDiagonalsOffsetsView = typename HostDiagonalsOffsetsType::ViewType; + + /** + * \brief The type of matrix elements. + */ + using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ + using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ + using IndexType = Index; + + /** + * \brief The allocator for matrix elements values. + */ + using RealAllocatorType = RealAllocator; + + /** + * \brief The allocator for matrix elements offsets from the diagonal. + */ + using IndexAllocatorType = IndexAllocator; + + /** + * \brief Type of related matrix view. + * + * See \ref MultidiagonalMatrixView. + */ + using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >; + + /** + * \brief Matrix view type for constant instances. + * + * See \ref MultidiagonalMatrixView. + */ + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; + + /** + * \brief Type for accessing matrix rows. + */ + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >; + + /** + * \brief Type for accessing constant matrix rows. + */ + using ConstRowView = typename RowView::ConstViewType; + + /** + * \brief Helper type for getting self type or its modifications. + */ + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + ElementsOrganization _Organization = Organization, + typename _RealAllocator = RealAllocator, + typename _IndexAllocator = IndexAllocator > + using Self = MultidiagonalMatrix< _Real, _Device, _Index, _Organization, _RealAllocator, _IndexAllocator >; + + /** + * \brief Elements organization getter. + */ + static constexpr ElementsOrganization getOrganization() { return Organization; }; + + /** + * \brief Constructor with no parameters. + */ + MultidiagonalMatrix(); + + /** + * \brief Constructor with matrix dimensions. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + */ + MultidiagonalMatrix( const IndexType rows, + const IndexType columns ); + + /** + * \brief Constructor with matrix dimensions and matrix elements offsets. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param diagonalsOffsets are offsets of subdiagonals from the main diagonal. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor.cpp + * \par Output + * \include MultidiagonalMatrixExample_Constructor.out + */ + template< typename Vector > + MultidiagonalMatrix( const IndexType rows, + const IndexType columns, + const Vector& diagonalsOffsets ); + + /** + * \brief Constructor with matrix dimensions and diagonals offsets. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param diagonalsOffsets are offsets of sub-diagonals from the main diagonal. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_1.cpp + * \par Output + * \include MultidiagonalMatrixExample_Constructor_init_list_1.out + */ + template< typename ListIndex > + MultidiagonalMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< ListIndex > diagonalsOffsets ); + + /** + * \brief Constructor with matrix dimensions, diagonals offsets and matrix elements. + * + * The number of matrix rows is deduced from the size of the initializer list \e data. + * + * \tparam ListIndex is type used in the initializer list defining matrix diagonals offsets. + * \tparam ListReal is type used in the initializer list defining matrix elements values. + * + * \param columns is number of matrix columns. + * \param diagonalOffsets are offsets of sub-diagonals from the main diagonal. + * \param data is initializer list holding matrix elements. The size of the outer list + * defines the number of matrix rows. Each inner list defines values of each sub-diagonal + * and so its size should be lower or equal to the size of \e diagonalsOffsets. Values + * of sub-diagonals which do not fit to given row are omitted. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_Constructor_init_list_2.cpp + * \par Output + * \include MultidiagonalMatrixExample_Constructor_init_list_2.out + */ + template< typename ListIndex, typename ListReal > + MultidiagonalMatrix( const IndexType columns, + const std::initializer_list< ListIndex > diagonalsOffsets, + const std::initializer_list< std::initializer_list< ListReal > >& data ); + + /** + * \brief Copy constructor. + * + * \param matrix is an input matrix. + */ + MultidiagonalMatrix( const MultidiagonalMatrix& matrix ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is an input matrix. + */ + MultidiagonalMatrix( MultidiagonalMatrix&& matrix ) = default; + + /** + * \brief Returns a modifiable view of the mutlidiagonal matrix. + * + * See \ref MultidiagonalMatrixView. + * + * \return multidiagonal matrix view. + */ + ViewType getView() const; // TODO: remove const + + /** + * \brief Returns a non-modifiable view of the multidiagonal matrix. + * + * See \ref MultidiagonalMatrixView. + * + * \return multidiagonal matrix view. + */ + ConstViewType getConstView() const; + + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::MultidiagonalMatrix< RealType, [any_device], IndexType, ElementsOrganization, [any_allocator], [any_allocator] >`. + * + * \return \ref String with the serialization type. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp + * \par Output + * \include MultidiagonalMatrixExample_getSerializationType.out + */ + static String getSerializationType(); + + /** + * \brief Returns string with serialization type. + * + * See \ref MultidiagonalMatrix::getSerializationType. + * + * \return \e String with the serialization type. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getSerializationType.cpp + * \par Output + * \include MultidiagonalMatrixExample_getSerializationType.out + */ + virtual String getSerializationTypeVirtual() const; + + /** + * \brief Set matrix dimensions and diagonals offsets. + * + * \tparam Vector is type of vector like container holding the diagonals offsets. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param diagonalsOffsets is vector with diagonals offsets. + */ + template< typename Vector > + void setDimensions( const IndexType rows, + const IndexType columns, + const Vector& diagonalsOffsets ); + + /** + * \brief This method is for compatibility with \ref SparseMatrix. + * + * It checks if the number of matrix diagonals is compatible with + * required number of non-zero matrix elements in each row. If not + * exception is thrown. + * + * \tparam RowCapacitiesVector is vector-like container type for holding required + * row capacities. + * + * \param rowCapacities is vector-like container holding required row capacities. + */ + template< typename RowCapacitiesVector > + void setRowCapacities( const RowCapacitiesVector& rowCapacities ); + + /** + * \brief Set matrix elements from an initializer list. + * + * \tparam ListReal is data type of the initializer list. + * + * \param data is initializer list holding matrix elements. The size of the outer list + * defines the number of matrix rows. Each inner list defines values of each sub-diagonal + * and so its size should be lower or equal to the size of \e diagonalsOffsets. Values + * of sub-diagonals which do not fit to given row are omitted. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElements.cpp + * \par Output + * \include MultidiagonalMatrixExample_setElements.out + */ + template< typename ListReal > + void setElements( const std::initializer_list< std::initializer_list< ListReal > >& data ); + + /** + * \brief Returns number of diagonals. + * + * \return Number of diagonals. + */ + const IndexType& getDiagonalsCount() const; + + /** + * \brief Returns vector with diagonals offsets. + * + * \return vector with diagonals offsets. + */ + const DiagonalsOffsetsType& getDiagonalsOffsets() const; + + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getCompressedRowLengths.cpp + * \par Output + * \include MultidiagonalMatrixExample_getCompressedRowLengths.out + */ + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + /** + * \brief Setup the matrix dimensions and diagonals offsets based on another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix. + * + * \param matrix is the source matrix. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_, + typename IndexAllocator_ > + void setLike( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ); + + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Resets the matrix to zero dimensions. + */ + void reset(); + + /** + * \brief Comparison operator with another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix. + * + * \return \e true if both matrices are identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_, + typename IndexAllocator_ > + bool operator == ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const; + + /** + * \brief Comparison operator with another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * \tparam IndexAllocator_ is \e IndexAllocator of the source matrix. + * + * \param matrix is the source matrix. + * + * \return \e true if both matrices are NOT identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_, + typename IndexAllocator_ > + bool operator != ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const; + + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getRow.cpp + * \par Output + * \include MultidiagonalMatrixExample_getRow.out + * + * See \ref MultidiagonalMatrixRowView. + */ + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getConstRow.cpp + * \par Output + * \include MultidiagonalMatrixExample_getConstRow.out + * + * See \ref MultidiagonalMatrixRowView. + */ + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + /** + * \brief Set all matrix elements to given value. + * + * \param value is the new value of all matrix elements. + */ + void setValue( const RealType& value ); + + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_setElement.cpp + * \par Output + * \include MultidiagonalMatrixExample_setElement.out + */ + __cuda_callable__ + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_addElement.cpp + * \par Output + * \include MultidiagonalMatrixExample_addElement.out + * + */ + __cuda_callable__ + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_getElement.cpp + * \par Output + * \include MultidiagonalMatrixExample_getElement.out + * + */ + __cuda_callable__ + RealType getElement( const IndexType row, + const IndexType column ) const; + + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_rowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_allRowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for iteration over matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp + * \par Output + * \include MultidiagonalMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ) const; + + /** + * \brief Method for iteration over matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forRows.cpp + * \par Output + * \include MultidiagonalMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ); + + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref MultidiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp + * \par Output + * \include MultidiagonalMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ) const; + + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref MultidiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixExample_forAllRows.cpp + * \par Output + * \include MultidiagonalMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ); + + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0, + const IndexType begin = 0, + IndexType end = 0 ) const; + + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void addMatrix( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const MultidiagonalMatrix< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + /** + * \brief Assignment of exactly the same matrix type. + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + MultidiagonalMatrix& operator=( const MultidiagonalMatrix& matrix ); + + /** + * \brief Assignment of another multidiagonal matrix + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_, + typename IndexAllocator_ > + MultidiagonalMatrix& operator=( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ); + + /** + * \brief Method for saving the matrix to a file. + * + * \param file is the output file. + */ + void save( File& file ) const; + + /** + * \brief Method for loading the matrix from a file. + * + * \param file is the input file. + */ + void load( File& file ); + + /** + * \brief Method for saving the matrix to the file with given filename. + * + * \param fileName is name of the file. + */ + void save( const String& fileName ) const; + + /** + * \brief Method for loading the matrix from the file with given filename. + * + * \param fileName is name of the file. + */ + void load( const String& fileName ); + + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ + void print( std::ostream& str ) const; + + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return constant reference to the indexer. + */ + const IndexerType& getIndexer() const; + + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return non-constant reference to the indexer. + */ + IndexerType& getIndexer(); + + /** + * \brief Returns padding index denoting padding zero elements. + * + * These elements are used for efficient data alignment in memory. + * + * \return value of the padding index. + */ + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + DiagonalsOffsetsType diagonalsOffsets; + + HostDiagonalsOffsetsType hostDiagonalsOffsets; + + IndexerType indexer; + + ViewType view; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/MultidiagonalMatrix.hpp similarity index 63% rename from src/TNL/Matrices/Multidiagonal.hpp rename to src/TNL/Matrices/MultidiagonalMatrix.hpp index be6ec58c50e4c967a7ec05b466fc8ec44f296fe0..e50782d9a6dcb7bb311aa497e805481eec5171ed 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrix.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Multidiagonal.hpp - description + MultidiagonalMatrix.hpp - description ------------------- begin : Oct 13, 2011 copyright : (C) 2011 by Tomas Oberhuber @@ -12,14 +12,14 @@ #include #include -#include +#include #include namespace TNL { namespace Matrices { template< typename Device > -class MultidiagonalDeviceDependentCode; +class MultidiagonalMatrixDeviceDependentCode; template< typename Real, typename Device, @@ -27,8 +27,8 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -Multidiagonal() +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix() { } @@ -39,13 +39,13 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > template< typename Vector > -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -Multidiagonal( const IndexType rows, +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix( const IndexType rows, const IndexType columns, - const Vector& diagonalsShifts ) + const Vector& diagonalsOffsets ) { - TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." ); - this->setDimensions( rows, columns, diagonalsShifts ); + TNL_ASSERT_GT( diagonalsOffsets.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." ); + this->setDimensions( rows, columns, diagonalsOffsets ); } template< typename Real, @@ -54,29 +54,51 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getView() const -> ViewType + template< typename ListIndex > +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< ListIndex > diagonalsOffsets ) { - // TODO: fix when getConstView works - return ViewType( const_cast< Multidiagonal* >( this )->values.getView(), - const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(), - const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(), - indexer ); + Containers::Vector< IndexType, DeviceType, IndexType > shifts( diagonalsOffsets ); + TNL_ASSERT_GT( shifts.getSize(), 0, "Cannot construct multidiagonal matrix with no diagonals shifts." ); + this->setDimensions( rows, columns, shifts ); } -/*template< typename Real, +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator, + typename IndexAllocator > + template< typename ListIndex, typename ListReal > +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix( const IndexType columns, + const std::initializer_list< ListIndex > diagonalsOffsets, + const std::initializer_list< std::initializer_list< ListReal > >& data ) +{ + Containers::Vector< IndexType, DeviceType, IndexType > shifts( diagonalsOffsets ); + TNL_ASSERT_GT( shifts.getSize(), 0, "Cannot construct multidiagonal matrix with no diagonals shifts." ); + this->setDimensions( data.size(), columns, shifts ); + this->setElements( data ); +} + +template< typename Real, typename Device, typename Index, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getConstView() const -> ConstViewType +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getView() const -> ViewType { - return ConstViewType( this->values.getConstView(), indexer ); -}*/ + // TODO: fix when getConstView works + return ViewType( const_cast< MultidiagonalMatrix* >( this )->values.getView(), + const_cast< MultidiagonalMatrix* >( this )->diagonalsOffsets.getView(), + const_cast< MultidiagonalMatrix* >( this )->hostDiagonalsOffsets.getView(), + indexer ); +} template< typename Real, typename Device, @@ -85,13 +107,10 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > String -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getSerializationType() { - return String( "Matrices::Multidiagonal< " ) + - TNL::getSerializationType< RealType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + ", " + - ( Organization ? "true" : "false" ) + ", [any_allocator], [any_allocator] >"; + return ViewType::getSerializationType(); } template< typename Real, @@ -101,7 +120,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > String -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -115,19 +134,19 @@ template< typename Real, typename IndexAllocator > template< typename Vector > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setDimensions( const IndexType rows, const IndexType columns, - const Vector& diagonalsShifts ) + const Vector& diagonalsOffsets ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); - this->diagonalsShifts = diagonalsShifts; - this->hostDiagonalsShifts = diagonalsShifts; - const IndexType minShift = min( diagonalsShifts ); + this->diagonalsOffsets = diagonalsOffsets; + this->hostDiagonalsOffsets = diagonalsOffsets; + const IndexType minOffset = min( diagonalsOffsets ); IndexType nonemptyRows = min( rows, columns ); - if( rows > columns && minShift < 0 ) - nonemptyRows = min( rows, nonemptyRows - minShift ); - this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows ); + if( rows > columns && minOffset < 0 ) + nonemptyRows = min( rows, nonemptyRows - minOffset ); + this->indexer.set( rows, columns, diagonalsOffsets.getSize(), nonemptyRows ); this->values.setSize( this->indexer.getStorageSize() ); this->values = 0.0; this->view = this->getView(); @@ -139,10 +158,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - // template< typename Vector > + template< typename RowCapacitiesVector > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +setRowCapacities( const RowCapacitiesVector& rowLengths ) { if( max( rowLengths ) > 3 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); @@ -166,11 +185,33 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -const Index& -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getDiagonalsCount() const + template< typename ListReal > +void +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +setElements( const std::initializer_list< std::initializer_list< ListReal > >& data ) { - return this->view.getDiagonalsCount(); + if( std::is_same< DeviceType, Devices::Host >::value ) + { + this->getValues() = 0.0; + auto row_it = data.begin(); + for( size_t rowIdx = 0; rowIdx < data.size(); rowIdx++ ) + { + auto data_it = row_it->begin(); + IndexType i = 0; + while( data_it != row_it->end() ) + this->getRow( rowIdx ).setElement( i++, *data_it++ ); + row_it ++; + } + } + else + { + MultidiagonalMatrix< Real, Devices::Host, Index, Organization > hostMatrix( + this->getRows(), + this->getColumns(), + this->getDiagonalsOffsets() ); + hostMatrix.setElements( data ); + *this = hostMatrix; + } } template< typename Real, @@ -179,11 +220,11 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getDiagonalsShifts() const -> const DiagonalsShiftsType& +const Index& +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getDiagonalsCount() const { - return this->diagonalsShifts; + return this->view.getDiagonalsCount(); } template< typename Real, @@ -192,12 +233,11 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Vector > -void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getCompressedRowLengths( Vector& rowLengths ) const +auto +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getDiagonalsOffsets() const -> const DiagonalsOffsetsType& { - return this->view.getCompressedRowLengths( rowLengths ); + return this->diagonalsOffsets; } template< typename Real, @@ -206,11 +246,12 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Index -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getNonemptyRowsCount() const + template< typename Vector > +void +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const { - return this->indexer.getNonemptyRowsCount(); + return this->view.getCompressedRowLengths( rowLengths ); } template< typename Real, @@ -220,7 +261,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRowLength( const IndexType row ) const { return this->view.getRowLength( row ); @@ -232,25 +273,12 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Index -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getMaxRowLength() const -{ - return this->view.getMaxRowLength(); -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization, - typename RealAllocator, - typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +setLike( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) { - this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() ); + this->setDimensions( matrix.getRows(), matrix.getColumns(), matrix.getDiagonalsOffsets() ); } template< typename Real, @@ -260,10 +288,10 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getNumberOfNonzeroMatrixElements() const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getNonzeroElementsCount() const { - return this->view.getNumberOfNonzeroMatrixElements(); + return this->view.getNonzeroElementsCount(); } template< typename Real, @@ -273,7 +301,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -285,16 +313,16 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > bool -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator == ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const { if( Organization == Organization_ ) return this->values == matrix.values; else { - TNL_ASSERT( false, "TODO" ); + TNL_ASSERT_TRUE( false, "TODO" ); } } @@ -304,10 +332,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > bool -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator != ( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -319,7 +347,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setValue( const RealType& v ) { this->view.setValue( v ); @@ -333,7 +361,7 @@ template< typename Real, typename IndexAllocator > __cuda_callable__ auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); @@ -347,7 +375,7 @@ template< typename Real, typename IndexAllocator > __cuda_callable__ auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); @@ -359,8 +387,9 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > +__cuda_callable__ void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { this->view.setElement( row, column, value ); @@ -372,8 +401,9 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > +__cuda_callable__ void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -388,8 +418,9 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > +__cuda_callable__ Real -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); @@ -403,7 +434,7 @@ template< typename Real, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); @@ -417,7 +448,21 @@ template< typename Real, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); @@ -429,12 +474,12 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Function > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -forRows( IndexType first, IndexType last, Function& function ) const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) { - this->view.forRows( first, last, function ); + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -443,10 +488,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Function > + template< typename Function > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -forRows( IndexType first, IndexType last, Function& function ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); } @@ -457,12 +502,12 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Function > + template< typename Function > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -forAllRows( Function& function ) const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) { - this->view.forRows( 0, this->getRows(), function ); + this->view.forRows( first, last, function ); } template< typename Real, @@ -473,8 +518,8 @@ template< typename Real, typename IndexAllocator > template< typename Function > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -forAllRows( Function& function ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) const { this->view.forRows( 0, this->getRows(), function ); } @@ -485,13 +530,12 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -template< typename Vector > -__cuda_callable__ -typename Vector::RealType -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const + template< typename Function > +void +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) { - return this->view.rowVectorProduct(); + this->view.forRows( 0, this->getRows(), function ); } template< typename Real, @@ -503,10 +547,16 @@ template< typename Real, template< typename InVector, typename OutVector > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator, + const IndexType firstRow, + IndexType lastRow ) const { - this->view.vectorProduct( inVector, outVector ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, + outVectorMultiplicator, firstRow, lastRow ); } template< typename Real, @@ -517,8 +567,8 @@ template< typename Real, typename IndexAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +addMatrix( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -530,8 +580,8 @@ template< typename Real, typename Real2, typename Index, typename Index2 > -__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, - Multidiagonal< Real, Devices::Cuda, Index >* outMatrix, +__global__ void MultidiagonalMatrixTranspositionCudaKernel( const MultidiagonalMatrix< Real2, Devices::Cuda, Index2 >* inMatrix, + MultidiagonalMatrix< Real, Devices::Cuda, Index >* outMatrix, const Real matrixMultiplicator, const Index gridIdx ) { @@ -560,8 +610,9 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > template< typename Real2, typename Index2 > -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +getTransposition( const MultidiagonalMatrix< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), std::cerr << "This matrix rows: " << this->getRows() << std::endl @@ -580,8 +631,8 @@ void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAlloc if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Multidiagonal* kernel_this = Cuda::passToDevice( *this ); - typedef Multidiagonal< Real2, Device, Index2 > InMatrixType; + MultidiagonalMatrix* kernel_this = Cuda::passToDevice( *this ); + typedef MultidiagonalMatrix< Real2, Device, Index2 > InMatrixType; InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); @@ -590,7 +641,7 @@ void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAlloc { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + MultidiagonalMatrixTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_inMatrix, kernel_this, matrixMultiplicator, @@ -611,10 +662,11 @@ template< typename Real, typename IndexAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega ) const +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const { RealType sum( 0.0 ); if( row > 0 ) @@ -632,8 +684,8 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::operator=( const MultidiagonalMatrix& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -648,11 +700,11 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator=( const MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) { - using RHSMatrix = Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >; + using RHSMatrix = MultidiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; @@ -674,7 +726,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAlloc } else { - const IndexType maxRowLength = this->diagonalsShifts.getSize(); + const IndexType maxRowLength = this->diagonalsOffsets.getSize(); const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); @@ -722,10 +774,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( File& file ) const +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); - file << diagonalsShifts; + file << diagonalsOffsets; } template< typename Real, @@ -734,16 +786,16 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( File& file ) +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); - file >> this->diagonalsShifts; - this->hostDiagonalsShifts = this->diagonalsShifts; - const IndexType minShift = min( diagonalsShifts ); + file >> this->diagonalsOffsets; + this->hostDiagonalsOffsets = this->diagonalsOffsets; + const IndexType minOffset = min( diagonalsOffsets ); IndexType nonemptyRows = min( this->getRows(), this->getColumns() ); - if( this->getRows() > this->getColumns() && minShift < 0 ) - nonemptyRows = min( this->getRows(), nonemptyRows - minShift ); - this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows ); + if( this->getRows() > this->getColumns() && minOffset < 0 ) + nonemptyRows = min( this->getRows(), nonemptyRows - minOffset ); + this->indexer.set( this->getRows(), this->getColumns(), diagonalsOffsets.getSize(), nonemptyRows ); this->view = this->getView(); } @@ -753,7 +805,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( const String& fileName ) const +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( const String& fileName ) const { Object::save( fileName ); } @@ -764,7 +816,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( const String& fileName ) +void MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( const String& fileName ) { Object::load( fileName ); } @@ -776,7 +828,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { this->view.print( str ); @@ -789,7 +841,7 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -802,32 +854,12 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getIndexer() -> IndexerType& { return this->indexer; } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -Index Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: -getElementIndex( const IndexType row, const IndexType column ) const -{ - IndexType localIdx = column - row; - if( row > 0 ) - localIdx++; - - TNL_ASSERT_GE( localIdx, 0, "" ); - TNL_ASSERT_LT( localIdx, 3, "" ); - - return this->indexer.getGlobalIndex( row, localIdx ); -} - template< typename Real, typename Device, typename Index, @@ -836,115 +868,11 @@ template< typename Real, typename IndexAllocator > __cuda_callable__ Index -Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +MultidiagonalMatrix< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); } -/* -template<> -class MultidiagonalDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return 2*row + column; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ 1 ]; - Index i = 3 * row; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ]; - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ] + - vector[ row + 1 ] * values[ i + 1 ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class MultidiagonalDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return ( column - row + 1 )*rows + row - 1; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ rows - 1 ]; - Index i = row - 1; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ]; - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ] + - vector[ row + 1 ] * values[ i + 2*rows ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; - */ - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h index 0825d6fb365ebd6552ee033d41a1fe208219a14e..181974b72cb491174c0a51a157ddc35c952c2da3 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixRowView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h @@ -13,37 +13,139 @@ namespace TNL { namespace Matrices { +/** + * \brief RowView is a simple structure for accessing rows of multidiagonal matrix. + * + * \tparam ValuesView is a vector view storing the matrix elements values. + * \tparam Indexer is type of object responsible for indexing and organization of + * matrix elements. + * \tparam DiagonalsOffsetsView_ is a container view holding offsets of + * diagonals of multidiagonal matrix. + * + * See \ref MultidiagonalMatrix and \ref MultidiagonalMatrixView. + * + * \par Example + * \include Matrices/MultidiagonalMatrixExample_getRow.cpp + * \par Output + * \include MultidiagonalatrixExample_getRow.out + * + * \par Example + * \include Matrices/MultidiagonalMatrixViewExample_getRow.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_getRow.out + */ template< typename ValuesView, typename Indexer, - typename DiagonalsShiftsView_ > + typename DiagonalsOffsetsView_ > class MultidiagonalMatrixRowView { public: + /** + * \brief The type of matrix elements. + */ using RealType = typename ValuesView::RealType; + + /** + * \brief The type used for matrix elements indexing. + */ using IndexType = typename ValuesView::IndexType; + + /** + * \brief Type of container view used for storing the matrix elements values. + */ using ValuesViewType = ValuesView; + + /** + * \brief Type of object responsible for indexing and organization of + * matrix elements. + */ using IndexerType = Indexer; - using DiagonalsShiftsView = DiagonalsShiftsView_; + /** + * \brief Type of a container view holding offsets of + * diagonals of multidiagonal matrix. + */ + using DiagonalsOffsetsView = DiagonalsOffsetsView_; + + /** + * \brief Type of constant container view used for storing the matrix elements values. + */ + using ConstValuesViewType = typename ValuesViewType::ConstViewType; + + /** + * \brief Type of constant container view used for storing the column indexes of the matrix elements. + */ + using ConstDiagonalsOffsetsViewType = typename DiagonalsOffsetsView::ConstViewType; + + /** + * \brief Type of constant indexer view. + */ + using ConstIndexerViewType = typename Indexer::ConstType; + + /** + * \brief Type of constant sparse matrix row view. + */ + using ConstViewType = MultidiagonalMatrixRowView< ConstValuesViewType, ConstIndexerViewType, ConstDiagonalsOffsetsViewType >; + + /** + * \brief Constructor with all necessary data. + * + * \param rowIdx is index of the matrix row this RowView refer to. + * \param diagonalsOffsets is a vector view holding offsets of matrix diagonals, + * \param values is a vector view holding values of matrix elements. + * \param indexer is object responsible for indexing and organization of matrix elements + */ __cuda_callable__ MultidiagonalMatrixRowView( const IndexType rowIdx, - const DiagonalsShiftsView& diagonalsShifts, + const DiagonalsOffsetsView& diagonalsOffsets, const ValuesViewType& values, - const IndexerType& indexer); + const IndexerType& indexer ); + /** + * \brief Returns number of diagonals of the multidiagonal matrix. + * + * \return number of diagonals of the multidiagonal matrix. + */ __cuda_callable__ IndexType getSize() const; + /** + * \brief Computes column index of matrix element on given subdiagonal. + * + * \param localIdx is an index of the subdiagonal. + * + * \return column index of matrix element on given subdiagonal. + */ __cuda_callable__ const IndexType getColumnIndex( const IndexType localIdx ) const; + /** + * \brief Returns value of matrix element on given subdiagonal. + * + * \param localIdx is an index of the subdiagonal. + * + * \return constant reference to matrix element value. + */ __cuda_callable__ const RealType& getValue( const IndexType localIdx ) const; + /** + * \brief Returns value of matrix element on given subdiagonal. + * + * \param localIdx is an index of the subdiagonal. + * + * \return non-constant reference to matrix element value. + */ __cuda_callable__ RealType& getValue( const IndexType localIdx ); + /** + * \brief Changes value of matrix element on given subdiagonal. + * + * \param localIdx is an index of the matrix subdiagonal. + * \param value is the new value of the matrix element. + */ __cuda_callable__ void setElement( const IndexType localIdx, const RealType& value ); @@ -51,7 +153,7 @@ class MultidiagonalMatrixRowView IndexType rowIdx; - DiagonalsShiftsView diagonalsShifts; + DiagonalsOffsetsView diagonalsOffsets; ValuesViewType values; diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp index 855b8463aa13eb5d21bee65923704d2be1d897ba..37cdd455a9838ac5e67ded80009a44bf4dd05796 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp @@ -13,59 +13,59 @@ namespace TNL { namespace Matrices { -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: MultidiagonalMatrixRowView( const IndexType rowIdx, - const DiagonalsShiftsView& diagonalsShifts, + const DiagonalsOffsetsView& diagonalsOffsets, const ValuesViewType& values, const IndexerType& indexer ) -: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer ) +: rowIdx( rowIdx ), diagonalsOffsets( diagonalsOffsets ), values( values ), indexer( indexer ) { } -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: getSize() const -> IndexType { - return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx ); + return diagonalsOffsets.getSize();//indexer.getRowSize( rowIdx ); } -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: getColumnIndex( const IndexType localIdx ) const -> const IndexType { TNL_ASSERT_GE( localIdx, 0, "" ); TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" ); - return rowIdx + diagonalsShifts[ localIdx ]; + return rowIdx + diagonalsOffsets[ localIdx ]; } -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: getValue( const IndexType localIdx ) const -> const RealType& { return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; } -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: getValue( const IndexType localIdx ) -> RealType& { return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; } -template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +template< typename ValuesView, typename Indexer, typename DiagonalsOffsetsView > __cuda_callable__ void -MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsOffsetsView >:: setElement( const IndexType localIdx, const RealType& value ) { diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index a3ebfe375d818a363acb6da1be90393dde63b672..2319570252d1f19167e39d749527036ee0cf6e91 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -19,6 +19,20 @@ namespace TNL { namespace Matrices { +/** + * \brief Implementation of sparse multidiagonal matrix. + * + * It serves as an accessor to \ref SparseMatrix for example when passing the + * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels. + * + * See \ref MultidiagonalMatrix for more details. + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. + */ template< typename Real = double, typename Device = Devices::Host, typename Index = int, @@ -26,116 +40,522 @@ template< typename Real = double, class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > { public: + + // Supporting types - they are not important for the user + using BaseType = MatrixView< Real, Device, Index >; + using ValuesViewType = typename BaseType::ValuesView; + using IndexerType = details::MultidiagonalMatrixIndexer< Index, Organization >; + using DiagonalsOffsetsView = Containers::VectorView< Index, Device, Index >; + using HostDiagonalsOffsetsView = Containers::VectorView< Index, Devices::Host, Index >; + + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ using IndexType = Index; - using BaseType = MatrixView< Real, Device, Index >; - //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >; - using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >; - //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; - using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >; - using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >; - using ValuesViewType = typename BaseType::ValuesView; + + /** + * \brief Type of related matrix view. + */ using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >; + + /** + * \brief Matrix view type for constant instances. + */ using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; - using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + /** + * \brief Type for accessing matrix rows. + */ + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsOffsetsView >; + /** + * \brief Helper type for getting self type or its modifications. + */ template< typename _Real = Real, typename _Device = Device, typename _Index = Index, ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > using Self = MultidiagonalMatrixView< _Real, _Device, _Index, Organization_ >; + /** + * \brief Constructor with no parameters. + */ + __cuda_callable__ MultidiagonalMatrixView(); + /** + * \brief Constructor with all necessary data and views. + * + * \param values is a vector view with matrix elements values + * \param diagonalsOffsets is a vector view with diagonals offsets + * \param hostDiagonalsOffsets is a vector view with a copy of diagonals offsets on the host + * \param indexer is an indexer of matrix elements + */ + __cuda_callable__ MultidiagonalMatrixView( const ValuesViewType& values, - const DiagonalsShiftsView& diagonalsShifts, - const HostDiagonalsShiftsView& hostDiagonalsShifts, + const DiagonalsOffsetsView& diagonalsOffsets, + const HostDiagonalsOffsetsView& hostDiagonalsOffsets, const IndexerType& indexer ); + /** + * \brief Copy constructor. + * + * \param matrix is an input multidiagonal matrix view. + */ + __cuda_callable__ + MultidiagonalMatrixView( const MultidiagonalMatrixView& view ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is an input multidiagonal matrix view. + */ + __cuda_callable__ + MultidiagonalMatrixView( MultidiagonalMatrixView&& view ) = default; + + /** + * \brief Returns a modifiable view of the multidiagonal matrix. + * + * \return multidiagonal matrix view. + */ ViewType getView(); + /** + * \brief Returns a non-modifiable view of the multidiagonal matrix. + * + * \return multidiagonal matrix view. + */ ConstViewType getConstView() const; + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::MultidiagonalMatrix< RealType, [any_device], IndexType, Organization, [any_allocator], [any_allocator] >`. + * + * See \ref MultidiagonalMatrix::getSerializationType. + * + * \return \ref String with the serialization type. + */ static String getSerializationType(); + /** + * \brief Returns string with serialization type. + * + * See \ref MultidiagonalMatrix::getSerializationType. + * + * \return \ref String with the serialization type. + */ virtual String getSerializationTypeVirtual() const; + /** + * \brief Returns number of diagonals. + * + * \return Number of diagonals. + */ __cuda_callable__ const IndexType& getDiagonalsCount() const; + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getCompressedRowLengths.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_getCompressedRowLengths.out + */ template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getNonemptyRowsCount() const; - [[deprecated]] IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowLength() const; - - IndexType getNumberOfNonzeroMatrixElements() const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Comparison operator with another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * + * \return \e true if both matrices are identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_ > bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + /** + * \brief Comparison operator with another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * + * \param matrix is the source matrix. + * + * \return \e true if both matrices are NOT identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_ > bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getRow.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_getRow.out + * + * See \ref MultidiagonalMatrixRowView. + */ __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getConstRow.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_getConstRow.out + * + * See \ref MultidiagonalMatrixRowView. + */ __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; + /** + * \brief Set all matrix elements to given value. + * + * \param value is the new value of all matrix elements. + */ void setValue( const RealType& v ); + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_setElement.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_setElement.out + */ + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_addElement.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_addElement.out + * + */ + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref MultidiagonalMatrix::getRow + * or \ref MultidiagonalMatrix::forRows and \ref MultidiagonalMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_getElement.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_getElement.out + * + */ + __cuda_callable__ RealType getElement( const IndexType row, const IndexType column ) const; - MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view ); - + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_rowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_rowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_allRowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_forRows.out + */ template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forRows.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_forRows.out + */ template< typename Function > void forRows( IndexType first, IndexType last, Function& function ); + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref MultidiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ) const; + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref MultidiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/MultidiagonalMatrix/MultidiagonalMatrixViewExample_forAllRows.cpp + * \par Output + * \include MultidiagonalMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0, + const IndexType firstRow = 0, + IndexType lastRow = 0 ) const; template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, @@ -153,30 +573,66 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; + /** + * \brief Assignment of exactly the same matrix type. + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view ); + + /** + * \brief Method for saving the matrix to a file. + * + * \param file is the output file. + */ void save( File& file ) const; + /** + * \brief Method for saving the matrix to the file with given filename. + * + * \param fileName is name of the file. + */ void save( const String& fileName ) const; + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ void print( std::ostream& str ) const; + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return constant reference to the indexer. + */ __cuda_callable__ const IndexerType& getIndexer() const; + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return non-constant reference to the indexer. + */ __cuda_callable__ IndexerType& getIndexer(); + /** + * \brief Returns padding index denoting padding zero elements. + * + * These elements are used for efficient data alignment in memory. + * + * \return value of the padding index. + */ __cuda_callable__ IndexType getPaddingIndex() const; protected: - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType localIdx ) const; - - DiagonalsShiftsView diagonalsShifts; + DiagonalsOffsetsView diagonalsOffsets; - HostDiagonalsShiftsView hostDiagonalsShifts; + HostDiagonalsOffsetsView hostDiagonalsOffsets; IndexerType indexer; }; diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 8d772b6b9db525731845110ea2b0f703057a5db9..0bc3c2d36de0bfc86de4ad728e1b174b4a287b18 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include @@ -32,12 +33,12 @@ template< typename Real, ElementsOrganization Organization > MultidiagonalMatrixView< Real, Device, Index, Organization >:: MultidiagonalMatrixView( const ValuesViewType& values, - const DiagonalsShiftsView& diagonalsShifts, - const HostDiagonalsShiftsView& hostDiagonalsShifts, + const DiagonalsOffsetsView& diagonalsOffsets, + const HostDiagonalsOffsetsView& hostDiagonalsOffsets, const IndexerType& indexer ) : MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), - diagonalsShifts( diagonalsShifts ), - hostDiagonalsShifts( hostDiagonalsShifts ), + diagonalsOffsets( diagonalsOffsets ), + hostDiagonalsOffsets( hostDiagonalsOffsets ), indexer( indexer ) { } @@ -51,8 +52,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: getView() -> ViewType { return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(), - const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(), - const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(), + const_cast< MultidiagonalMatrixView* >( this )->diagonalsOffsets.getView(), + const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsOffsets.getView(), indexer ); } @@ -65,8 +66,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), - this->diagonalsShifts.getConstView(), - this->hostDiagonalsShifts.getConstView(), + this->diagonalsOffsets.getConstView(), + this->hostDiagonalsOffsets.getConstView(), indexer ); } @@ -78,10 +79,10 @@ String MultidiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationType() { - return String( "Matrices::Multidiagonal< " ) + + return String( "Matrices::MultidiagonalMatrix< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( Organization ? "true" : "false" ) + ", [any_allocator] >"; + TNL::getSerializationType( Organization ) + ", [any_allocator], [any_allocator] >"; } template< typename Real, @@ -104,7 +105,7 @@ const Index& MultidiagonalMatrixView< Real, Device, Index, Organization >:: getDiagonalsCount() const { - return this->diagonalsShifts.getSize(); + return this->diagonalsOffsets.getSize(); } template< typename Real, @@ -131,17 +132,6 @@ getCompressedRowLengths( Vector& rowLengths ) const this->allRowsReduction( fetch, reduce, keep, 0 ); } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -MultidiagonalMatrixView< Real, Device, Index, Organization >:: -getNonemptyRowsCount() const -{ - return this->indexer.getNonemptyRowsCount(); -} - template< typename Real, typename Device, typename Index, @@ -150,7 +140,7 @@ Index MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRowLength( const IndexType row ) const { - return this->diagonalsShifts.getSize(); + return this->diagonalsOffsets.getSize(); } template< typename Real, @@ -159,18 +149,7 @@ template< typename Real, ElementsOrganization Organization > Index MultidiagonalMatrixView< Real, Device, Index, Organization >:: -getMaxRowLength() const -{ - return this->diagonalsShifts.getSize(); -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -MultidiagonalMatrixView< Real, Device, Index, Organization >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { @@ -192,7 +171,7 @@ operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organizatio return this->values == matrix.values; else { - TNL_ASSERT( false, "TODO" ); + TNL_ASSERT_TRUE( false, "TODO" ); } } @@ -217,7 +196,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: setValue( const RealType& v ) { // we dont do this->values = v here because it would set even elements 'outside' the matrix - // method getNumberOfNonzeroElements would not well + // method getNumberOfNonzeroElements would not work well then const RealType newValue = v; auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable { value = newValue; @@ -234,7 +213,7 @@ auto MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) const -> const RowView { - return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); + return RowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer ); } template< typename Real, @@ -246,13 +225,14 @@ auto MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) -> RowView { - return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); + return RowView( rowIdx, this->diagonalsOffsets.getView(), this->values.getView(), this->indexer ); } template< typename Real, typename Device, typename Index, ElementsOrganization Organization > +__cuda_callable__ void MultidiagonalMatrixView< Real, Device, Index, Organization >:: setElement( const IndexType row, const IndexType column, const RealType& value ) @@ -262,17 +242,21 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) - if( row + hostDiagonalsShifts[ i ] == column ) + for( IndexType i = 0; i < diagonalsOffsets.getSize(); i++ ) + if( row + diagonalsOffsets.getElement( i ) == column ) { - this->values.setElement( this->getElementIndex( row, i ), value ); + this->values.setElement( this->indexer.getGlobalIndex( row, i ), value ); return; } if( value != 0.0 ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( false, "" ); +#else std::stringstream msg; msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; throw std::logic_error( msg.str() ); +#endif } } @@ -280,6 +264,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > +__cuda_callable__ void MultidiagonalMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, @@ -292,18 +277,22 @@ addElement( const IndexType row, TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) - if( row + hostDiagonalsShifts[ i ] == column ) + for( IndexType i = 0; i < diagonalsOffsets.getSize(); i++ ) + if( row + diagonalsOffsets.getElement( i ) == column ) { - const Index idx = this->getElementIndex( row, i ); + const Index idx = this->indexer.getGlobalIndex( row, i ); this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value ); return; } if( value != 0.0 ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( false, "" ); +#else std::stringstream msg; msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; throw std::logic_error( msg.str() ); +#endif } } @@ -311,6 +300,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > +__cuda_callable__ Real MultidiagonalMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const @@ -320,9 +310,9 @@ getElement( const IndexType row, const IndexType column ) const TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) - if( row + hostDiagonalsShifts[ i ] == column ) - return this->values.getElement( this->getElementIndex( row, i ) ); + for( IndexType localIdx = 0; localIdx < diagonalsOffsets.getSize(); localIdx++ ) + if( row + diagonalsOffsets.getElement( localIdx ) == column ) + return this->values.getElement( this->indexer.getGlobalIndex( row, localIdx ) ); return 0.0; } @@ -335,8 +325,8 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: operator=( const MultidiagonalMatrixView& view ) { MatrixView< Real, Device, Index >::operator=( view ); - this->diagonalsShifts.bind( view.diagonalsShifts ); - this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts ); + this->diagonalsOffsets.bind( view.diagonalsOffsets ); + this->hostDiagonalsOffsets.bind( view.hostDiagonalsOffsets ); this->indexer = view.indexer; return *this; } @@ -352,8 +342,8 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke { using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); const auto values_view = this->values.getConstView(); - const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); - const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView(); + const IndexType diagonalsCount = this->diagonalsOffsets.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; const auto zero = zero_; @@ -361,7 +351,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke Real_ sum( zero ); for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { - const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) ); } @@ -370,6 +360,35 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, Organization >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) +{ + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); + const auto values_view = this->values.getConstView(); + const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView(); + const IndexType diagonalsCount = this->diagonalsOffsets.getSize(); + const IndexType columns = this->getColumns(); + const auto indexer = this->indexer; + const auto zero = zero_; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + Real_ sum( zero ); + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) + { + const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + sum = reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) ); + } + keep( rowIdx, sum ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + template< typename Real, typename Device, typename Index, @@ -382,6 +401,18 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); } +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, Organization >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); +} + template< typename Real, typename Device, typename Index, @@ -392,15 +423,15 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); - const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); - const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView(); + const IndexType diagonalsCount = this->diagonalsOffsets.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { - const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); } @@ -418,15 +449,15 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); - const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); - const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const auto diagonalsOffsets_view = this->diagonalsOffsets.getConstView(); + const IndexType diagonalsCount = this->diagonalsOffsets.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ ) { - const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + const IndexType columnIdx = rowIdx + diagonalsOffsets_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); } @@ -458,18 +489,6 @@ forAllRows( Function& function ) this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -template< typename Vector > -__cuda_callable__ -typename Vector::RealType -MultidiagonalMatrixView< Real, Device, Index, Organization >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ -} - template< typename Real, typename Device, typename Index, @@ -478,7 +497,12 @@ template< typename Real, typename OutVector > void MultidiagonalMatrixView< Real, Device, Index, Organization >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator, + const IndexType begin, + IndexType end ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); @@ -491,10 +515,19 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; }; - auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - outVectorView[ row ] = value; + auto keeper1 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = matrixMultiplicator * value; + }; + auto keeper2 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; }; - this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); + + if( end == 0 ) + end = this->getRows(); + if( outVectorMultiplicator == ( RealType ) 0.0 ) + this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 ); + else + this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 ); } template< typename Real, @@ -670,14 +703,18 @@ void MultidiagonalMatrixView< Real, Device, Index, Organization >::print( std::o for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ ) { str <<"Row: " << rowIdx << " -> "; - for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ ) + for( IndexType localIdx = 0; localIdx < this->hostDiagonalsOffsets.getSize(); localIdx++ ) { - const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ]; + const IndexType columnIdx = rowIdx + this->hostDiagonalsOffsets[ localIdx ]; if( columnIdx >= 0 && columnIdx < this->columns ) { - auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) ); - if( v ) - str << " Col:" << columnIdx << "->" << v << "\t"; + auto value = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) ); + if( value ) + { + std::stringstream str_; + str_ << std::setw( 4 ) << std::right << columnIdx << ":" << std::setw( 4 ) << std::left << value; + str << std::setw( 10 ) << str_.str(); + } } } str << std::endl; @@ -708,7 +745,7 @@ getIndexer() -> IndexerType& return this->indexer; } -template< typename Real, +/*template< typename Real, typename Device, typename Index, ElementsOrganization Organization > @@ -718,7 +755,7 @@ MultidiagonalMatrixView< Real, Device, Index, Organization >:: getElementIndex( const IndexType row, const IndexType localIdx ) const { return this->indexer.getGlobalIndex( row, localIdx ); -} +}*/ template< typename Real, typename Device, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 5986431374dbdab46cc7cc8b2950858c08d33579..268af8a332dafb066d6061021b193c06dbf5ddeb 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -22,6 +22,23 @@ namespace TNL { namespace Matrices { +/** + * \brief Implementation of sparse matrix, i.e. matrix storing only non-zero elements. + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam MatrixType specifies the type of matrix - its symmetry or binarity. See \ref MatrixType. + * Both symmetric and binary matrix types reduces memory consumption. Binary matrix does not store + * the matrix values explicitly since the non-zero elements can have only value equal to one. Symmetric + * matrices store only lower part of the matrix and its diagonal. The upper part is reconstructed on the fly. + * GeneralMatrix with no symmetry is used by default. + * \tparam Segments is a structure representing the sparse matrix format. Depending on the pattern of the non-zero elements + * different matrix formats can perform differently especially on GPUs. By default \ref CSR format is used. See also + * \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack. + * \tparam RealAllocator is allocator for the matrix elements values. + * \tparam IndexAllocator is allocator for the matrix elements column indexes. + */ template< typename Real, typename Device = Devices::Host, typename Index = int, @@ -31,43 +48,114 @@ template< typename Real, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > { + static_assert( + ! MatrixType::isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ); + public: + + // Supporting types - they are not important for the user + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using ConstValuesViewType = typename ValuesViewType::ConstViewType; + using ColumnsIndexesVectorType = Containers::Vector< Index, Device, Index, IndexAllocator >; + using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; + using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType; + using RowsCapacitiesType = Containers::Vector< Index, Device, Index, IndexAllocator >; + using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; + + /** + * \brief Test of symmetric matrix type. + * + * \return \e true if the matrix is stored as symmetric and \e false otherwise. + */ static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; - static constexpr bool isBinary() { return MatrixType::isBinary(); }; - static_assert( - ! isSymmetric() || - ! std::is_same< Device, Devices::Cuda >::value || - ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), - "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ); + /** + * \brief Test of binary matrix type. + * + * \return \e true if the matrix is stored as binary and \e false otherwise. + */ + static constexpr bool isBinary() { return MatrixType::isBinary(); }; + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ + using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ + using IndexType = Index; + + /** + * \brief Templated type of segments, i.e. sparse matrix format. + */ template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; + + /** + * \brief Type of segments used by this matrix. It represents the sparse matrix format. + */ using SegmentsType = Segments< Device, Index, IndexAllocator >; + + /** + * \brief Templated view type of segments, i.e. sparse matrix format. + */ template< typename Device_, typename Index_ > using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >; + + /** + * \brief Type of segments view used by the related matrix view. It represents the sparse matrix format. + */ using SegmentsViewType = typename SegmentsType::ViewType; - using SegmentViewType = typename SegmentsType::SegmentViewType; - using DeviceType = Device; - using IndexType = Index; + + /** + * \brief The allocator for matrix elements values. + */ using RealAllocatorType = RealAllocator; + + /** + * \brief The allocator for matrix elements column indexes. + */ using IndexAllocatorType = IndexAllocator; - using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; - using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; - using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; - using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType; - using ValuesViewType = typename ValuesVectorType::ViewType; - using ConstValuesViewType = typename ValuesViewType::ConstViewType; - using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; - using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; - using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType; + + /** + * \brief Type of related matrix view. + * + * See \ref SparseMatrixView. + */ using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; - using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; + + /** + * \brief Matrix view type for constant instances. + * + * See \ref SparseMatrixView. + */ + using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >; + + /** + * \brief Type for accessing matrix rows. + */ + using RowView = SparseMatrixRowView< typename SegmentsType::SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; + + /** + * \brief Type for accessing constant matrix rows. + */ using ConstRowView = typename RowView::ConstViewType; + /** + * \brief Helper type for getting self type or its modifications. + */ template< typename _Real = Real, typename _Device = Device, typename _Index = Index, @@ -77,107 +165,606 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > typename _IndexAllocator = typename Allocators::Default< _Device >::template Allocator< _Index > > using Self = SparseMatrix< _Real, _Device, _Index, _MatrixType, _Segments, _RealAllocator, _IndexAllocator >; - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - + /** + * \brief Constructor only with values and column indexes allocators. + * + * \param realAllocator is used for allocation of matrix elements values. + * \param indexAllocator is used for allocation of matrix elements column indexes. + */ SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - SparseMatrix( const SparseMatrix& m ) = default; + /** + * \brief Copy constructor. + * + * \param matrix is the source matrix + */ + SparseMatrix( const SparseMatrix& matrix1 ) = default; - SparseMatrix( SparseMatrix&& m ) = default; + /** + * \brief Move constructor. + * + * \param matrix is the source matrix + */ + SparseMatrix( SparseMatrix&& matrix ) = default; + /** + * \brief Constructor with matrix dimensions. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param realAllocator is used for allocation of matrix elements values. + * \param indexAllocator is used for allocation of matrix elements column indexes. + */ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, - const IndexType columns, - const RealAllocatorType& realAllocator = RealAllocatorType(), - const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + /** + * \brief Constructor with matrix rows capacities and number of columns. + * + * The number of matrix rows is given by the size of \e rowCapacities list. + * + * \tparam ListIndex is the initializer list values type. + * \param rowCapacities is a list telling how many matrix elements must be + * allocated in each row. + * \param columns is the number of matrix columns. + * \param realAllocator is used for allocation of matrix elements values. + * \param indexAllocator is used for allocation of matrix elements column indexes. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp + * \par Output + * \include SparseMatrixExample_Constructor_init_list_1.out + */ + template< typename ListIndex > + explicit SparseMatrix( const std::initializer_list< ListIndex >& rowCapacities, + const IndexType columns, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - SparseMatrix( const IndexType rows, - const IndexType columns, - const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, - const RealAllocatorType& realAllocator = RealAllocatorType(), - const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + /** + * \brief Constructor with matrix dimensions and data in initializer list. + * + * The matrix elements values are given as a list \e data of triples: + * { { row1, column1, value1 }, + * { row2, column2, value2 }, + * ... }. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param data is a list of matrix elements values. + * \param realAllocator is used for allocation of matrix elements values. + * \param indexAllocator is used for allocation of matrix elements column indexes. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp + * \par Output + * \include SparseMatrixExample_Constructor_init_list_2.out + */ + explicit SparseMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + /** + * \brief Constructor with matrix dimensions and data in std::map. + * + * The matrix elements values are given as a map \e data where keys are + * std::pair of matrix coordinates ( {row, column} ) and value is the + * matrix element value. + * + * \tparam MapIndex is a type for indexing rows and columns. + * \tparam MapValue is a type for matrix elements values in the map. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + * \param map is std::map containing matrix elements. + * \param realAllocator is used for allocation of matrix elements values. + * \param indexAllocator is used for allocation of matrix elements column indexes. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp + * \par Output + * \include SparseMatrixExample_Constructor_std_map.out + */ template< typename MapIndex, typename MapValue > explicit SparseMatrix( const IndexType rows, const IndexType columns, - const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ); - - virtual void setDimensions( const IndexType rows, - const IndexType columns ) override; + const std::map< std::pair< MapIndex, MapIndex >, MapValue >& map, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + /** + * \brief Returns a modifiable view of the sparse matrix. + * + * See \ref SparseMatrixView. + * + * \return sparse matrix view. + */ ViewType getView() const; // TODO: remove const + /** + * \brief Returns a non-modifiable view of the sparse matrix. + * + * See \ref SparseMatrixView. + * + * \return sparse matrix view. + */ ConstViewType getConstView() const; + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::SparseMatrix< RealType, [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`. + * + * \return \ref String with the serialization type. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp + * \par Output + * \include SparseMatrixExample_getSerializationType.out + */ static String getSerializationType(); + /** + * \brief Returns string with serialization type. + * + * See \ref SparseMatrix::getSerializationType. + * + * \return \e String with the serialization type. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp + * \par Output + * \include SparseMatrixExample_getSerializationType.out + */ virtual String getSerializationTypeVirtual() const; + /** + * \brief Set number of rows and columns of this matrix. + * + * \param rows is the number of matrix rows. + * \param columns is the number of matrix columns. + */ + virtual void setDimensions( const IndexType rows, + const IndexType columns ) override; + + /** + * \brief Set the number of matrix rows and columns by the given matrix. + * + * \tparam Matrix is matrix type. This can be any matrix having methods + * \ref getRows and \ref getColumns. + * + * \param matrix in the input matrix dimensions of which are to be adopted. + */ + template< typename Matrix > + void setLike( const Matrix& matrix ); + + /** + * \brief Allocates memory for non-zero matrix elements. + * + * The size of the input vector must be equal to the number of matrix rows. + * The number of allocated matrix elements for each matrix row depends on + * the sparse matrix format. Some formats may allocate more elements than + * required. + * + * \tparam RowsCapacitiesVector is a type of vector/array used for row + * capacities setting. + * + * \param rowCapacities is a vector telling the number of required non-zero + * matrix elements in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp + * \par Output + * \include SparseMatrixExample_setRowCapacities.out + */ template< typename RowsCapacitiesVector > void setRowCapacities( const RowsCapacitiesVector& rowCapacities ); // TODO: Remove this when possible template< typename RowsCapacitiesVector > + [[deprecated]] void setCompressedRowLengths( const RowsCapacitiesVector& rowLengths ) { this->setRowCapacities( rowLengths ); }; + /** + * \brief This method sets the sparse matrix elements from initializer list. + * + * The number of matrix rows and columns must be set already. + * The matrix elements values are given as a list \e data of triples: + * { { row1, column1, value1 }, + * { row2, column2, value2 }, + * ... }. + * + * \param data is a initializer list of initializer lists representing + * list of matrix rows. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp + * \par Output + * \include SparseMatrixExample_setElements.out + */ void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ); + /** + * \brief This method sets the sparse matrix elements from std::map. + * + * The matrix elements values are given as a map \e data where keys are + * std::pair of matrix coordinates ( {row, column} ) and value is the + * matrix element value. + * + * \tparam MapIndex is a type for indexing rows and columns. + * \tparam MapValue is a type for matrix elements values in the map. + * + * \param map is std::map containing matrix elements. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp + * \par Output + * \include SparseMatrixExample_setElements_map.out + */ template< typename MapIndex, typename MapValue > void setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ); + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp + * \par Output + * \include SparseMatrixExample_getCompressedRowLengths.out + */ template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; + /** + * \brief Returns capacity of given matrix row. + * + * \param row index of matrix row. + * \return number of matrix elements allocated for the row. + */ __cuda_callable__ IndexType getRowCapacity( const IndexType row ) const; - template< typename Matrix > - void setLike( const Matrix& matrix ); - - IndexType getNumberOfNonzeroMatrixElements() const; + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + /** + * \brief Resets the matrix to zero dimensions. + */ void reset(); + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp + * \par Output + * \include SparseMatrixExample_getConstRow.out + * + * See \ref SparseMatrixRowView. + */ __cuda_callable__ const ConstRowView getRow( const IndexType& rowIdx ) const; + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp + * \par Output + * \include SparseMatrixExample_getRow.out + * + * See \ref SparseMatrixRowView. + */ __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp + * \par Output + * \include SparseMatrixExample_setElement.out + */ __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp + * \par Output + * \include SparseMatrixExample_addElement.out + * + */ __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ); + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp + * \par Output + * \include SparseMatrixExample_getElement.out + * + */ __cuda_callable__ RealType getElement( const IndexType row, const IndexType column ) const; - /*template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const;*/ + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp + * \par Output + * \include SparseMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp + * \par Output + * \include SparseMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - /*** - * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp + * \par Output + * \include SparseMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp + * \par Output + * \include SparseMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp + * \par Output + * \include SparseMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ) const; + + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp + * \par Output + * \include SparseMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ); + + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref SparseMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp + * \par Output + * \include SparseMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ) const; + + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref SparseMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp + * \par Output + * \include SparseMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ); + + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. */ template< typename InVector, typename OutVector > @@ -198,24 +785,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealType& matrixMultiplicator = 1.0 ); */ - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); - - template< typename Function > - void forAllRows( Function& function ) const; - - template< typename Function > - void forAllRows( Function& function ); - template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, @@ -224,51 +793,116 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Assignment of exactly the same matrix type. - * @param matrix - * @return + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. */ SparseMatrix& operator=( const SparseMatrix& matrix ); /** * \brief Assignment of dense matrix + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. */ template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization, typename RealAllocator_ > SparseMatrix& operator=( const DenseMatrix< Real_, Device_, Index_, Organization, RealAllocator_ >& matrix ); /** - * \brief Assignment of any other matrix type. - * @param matrix - * @return + * \brief Assignment of any matrix type other then this and dense. + * . + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. */ template< typename RHSMatrix > SparseMatrix& operator=( const RHSMatrix& matrix ); + /** + * \brief Comparison operator with another arbitrary matrix type. + * + * \param matrix is the right-hand side matrix. + * \return \e true if the RHS matrix is equal, \e false otherwise. + */ template< typename Matrix > bool operator==( const Matrix& m ) const; + /** + * \brief Comparison operator with another arbitrary matrix type. + * + * \param matrix is the right-hand side matrix. + * \return \e true if the RHS matrix is equal, \e false otherwise. + */ template< typename Matrix > bool operator!=( const Matrix& m ) const; - void save( File& file ) const; - - void load( File& file ); - + /** + * \brief Method for saving the matrix to the file with given filename. + * + * \param fileName is name of the file. + */ void save( const String& fileName ) const; + /** + * \brief Method for loading the matrix from the file with given filename. + * + * \param fileName is name of the file. + */ void load( const String& fileName ); + /** + * \brief Method for saving the matrix to a file. + * + * \param fileName is name of the file. + */ + void save( File& file ) const; + + /** + * \brief Method for loading the matrix from a file. + * + * \param fileName is name of the file. + */ + void load( File& file ); + + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ void print( std::ostream& str ) const; + /** + * \brief Returns a padding index value. + * + * Padding index is used for column indexes of padding zeros. Padding zeros + * are used in some sparse matrix formats for better data alignment in memory. + * + * \return value of the padding index. + */ __cuda_callable__ IndexType getPaddingIndex() const; + /** + * \brief Getter of segments for non-constant instances. + * + * \e Segments are a structure for addressing the matrix elements columns and values. + * In fact, \e Segments represent the sparse matrix format. + * + * \return Non-constant reference to segments. + */ SegmentsType& getSegments(); + /** + * \brief Getter of segments for constant instances. + * + * \e Segments are a structure for addressing the matrix elements columns and values. + * In fact, \e Segments represent the sparse matrix format. + * + * \return Constant reference to segments. + */ const SegmentsType& getSegments() const; -// TODO: restore it and also in Matrix -// protected: + protected: ColumnsIndexesVectorType columnIndexes; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index d13537cefaf3aaf409cd1ecc91f1f09ad7cf2e8d..d60d49611dbfff4cad01049d8fdc4ed7657b19cc 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -57,14 +57,15 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > + template< typename ListIndex > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, +SparseMatrix( const std::initializer_list< ListIndex >& rowCapacities, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator ) { - this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) ); + this->setRowCapacities( RowsCapacitiesType( rowCapacities ) ); } template< typename Real, @@ -97,29 +98,15 @@ template< typename Real, SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const IndexType rows, const IndexType columns, - const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ) + const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { this->setDimensions( rows, columns ); this->setElements( map ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -void -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setDimensions( const IndexType rows, - const IndexType columns ) -{ - BaseType::setDimensions( rows, columns ); - segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( rows, 0 ) ); - this->view = this->getView(); -} - template< typename Real, typename Device, typename Index, @@ -167,10 +154,7 @@ String SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getSerializationType() { - return String( "Matrices::SparseMatrix< " ) + - TNL::getSerializationType< RealType >() + ", " + - TNL::getSerializationType< SegmentsType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; + return ViewType::getSerializationType(); } template< typename Real, @@ -187,6 +171,41 @@ getSerializationTypeVirtual() const return this->getSerializationType(); } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns ) +{ + BaseType::setDimensions( rows, columns ); + segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( rows, 0 ) ); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Matrix_ > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setLike( const Matrix_& matrix ) +{ + BaseType::setLike( matrix ); + this->segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( matrix.getRows(), 0 ) ), + this->view = this->getView(); + TNL_ASSERT_EQ( this->getRows(), segments.getSegmentsCount(), "mismatched segments count" ); +} + template< typename Real, typename Device, typename Index, @@ -320,24 +339,6 @@ getRowCapacity( const IndexType row ) const return this->view.getRowCapacity( row ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > - template< typename Matrix_ > -void -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setLike( const Matrix_& matrix ) -{ - BaseType::setLike( matrix ); - this->segments.setSegmentsSizes( Containers::Vector< IndexType, DeviceType, IndexType >( matrix.getRows(), 0 ) ), - this->view = this->getView(); - TNL_ASSERT_EQ( this->getRows(), segments.getSegmentsCount(), "mismatched segments count" ); -} - template< typename Real, typename Device, typename Index, @@ -347,9 +348,9 @@ template< typename Real, typename IndexAllocator > Index SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { - return this->view.getNumberOfNonzeroMatrixElements(); + return this->view.getNonzeroElementsCount(); } template< typename Real, @@ -446,23 +447,6 @@ getElement( const IndexType row, return this->view.getElement( row, column ); } -/*template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -rowVectorProduct( const IndexType row, - const Vector& vector ) const -{ - return this->view.rowVectorProduct( row, vector ); -}*/ - template< typename Real, typename Device, typename Index, @@ -494,9 +478,39 @@ template< typename Real, template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) +{ + this->view.rowsReduction( begin, end, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const { - this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); + this->view.rowsReduction( begin, end, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -524,9 +538,9 @@ template< typename Real, template< typename Function > void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -forRows( IndexType first, IndexType last, Function& function ) const +forRows( IndexType begin, IndexType end, Function& function ) const { - this->view.forRows( first, last, function ); + this->view.forRows( begin, end, function ); } template< typename Real, @@ -539,9 +553,9 @@ template< typename Real, template< typename Function > void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -forRows( IndexType first, IndexType last, Function& function ) +forRows( IndexType begin, IndexType end, Function& function ) { - this->view.forRows( first, last, function ); + this->view.forRows( begin, end, function ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h index c859655ef5ba00fa5ed759e01b6a126dd4fd2324..71555ab399329cb3037216ad9de280def575a871 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.h +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -17,6 +17,26 @@ namespace TNL { namespace Matrices { +/** + * \brief RowView is a simple structure for accessing rows of sparse matrix. + * + * \tparam SegmentView is a segment view of segments representing the matrix format. + * \tparam ValuesView is a vector view storing the matrix elements values. + * \tparam ColumnsIndexesView is a vector view storing the column indexes of the matrix element. + * \tparam isBinary tells if the the parent matrix is a binary matrix. + * + * See \ref SparseMatrix and \ref SparseMatrixView. + * + * \par Example + * \include Matrices/SparseMatrixExample_getRow.cpp + * \par Output + * \include SparseMatrixExample_getRow.out + * + * \par Example + * \include Matrices/SparseMatrixViewExample_getRow.cpp + * \par Output + * \include SparseMatrixViewExample_getRow.out + */ template< typename SegmentView, typename ValuesView, typename ColumnsIndexesView, @@ -25,46 +45,152 @@ class SparseMatrixRowView { public: + /** + * \brief The type of matrix elements. + */ using RealType = typename ValuesView::RealType; + + /** + * \brief The type used for matrix elements indexing. + */ + using IndexType = typename ColumnsIndexesView::IndexType; + + /** + * \brief Type representing matrix row format. + */ using SegmentViewType = SegmentView; - using IndexType = typename SegmentViewType::IndexType; + + /** + * \brief Type of container view used for storing the matrix elements values. + */ using ValuesViewType = ValuesView; + + /** + * \brief Type of container view used for storing the column indexes of the matrix elements. + */ using ColumnsIndexesViewType = ColumnsIndexesView; + + /** + * \brief Type of constant container view used for storing the matrix elements values. + */ using ConstValuesViewType = typename ValuesViewType::ConstViewType; + + /** + * \brief Type of constant container view used for storing the column indexes of the matrix elements. + */ using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType; + + /** + * \brief Type of constant sparse matrix row view. + */ using ConstViewType = SparseMatrixRowView< SegmentView, ConstValuesViewType, ConstColumnsIndexesViewType, isBinary_ >; + /** + * \brief Tells whether the parent matrix is a binary matrix. + * @return + */ static constexpr bool isBinary() { return isBinary_; }; + /** + * \brief Constructor with \e segmentView, \e values and \e columnIndexes. + * + * \param segmentView instance of SegmentViewType representing matrix row. + * \param values is a container view for storing the matrix elements values. + * \param columnIndexes is a container view for storing the column indexes of the matrix elements. + */ __cuda_callable__ SparseMatrixRowView( const SegmentViewType& segmentView, const ValuesViewType& values, const ColumnsIndexesViewType& columnIndexes ); + /** + * \brief Returns size of the matrix row, i.e. number of matrix elements in this row. + * + * \return Size of the matrix row. + */ __cuda_callable__ IndexType getSize() const; + /** + * \brief Returns constants reference to a column index of an element with given rank in the row. + * + * \param localIdx is the rank of the non-zero element in given row. + * + * \return constant reference to the matrix element column index. + */ __cuda_callable__ const IndexType& getColumnIndex( const IndexType localIdx ) const; + /** + * \brief Returns non-constants reference to a column index of an element with given rank in the row. + * + * \param localIdx is the rank of the non-zero element in given row. + * + * \return non-constant reference to the matrix element column index. + */ __cuda_callable__ IndexType& getColumnIndex( const IndexType localIdx ); + /** + * \brief Returns constants reference to value of an element with given rank in the row. + * + * \param localIdx is the rank of the non-zero element in given row. + * + * \return constant reference to the matrix element value. + */ __cuda_callable__ const RealType& getValue( const IndexType localIdx ) const; + /** + * \brief Returns non-constants reference to value of an element with given rank in the row. + * + * \param localIdx is the rank of the non-zero element in given row. + * + * \return non-constant reference to the matrix element value. + */ __cuda_callable__ RealType& getValue( const IndexType localIdx ); + /** + * \brief Sets a value of matrix element with given rank in the matrix row. + * + * \param localIdx is the rank of the matrix element in the row. + * \param value is the new value of the matrix element. + */ __cuda_callable__ void setValue( const IndexType localIdx, const RealType& value ); + /** + * \brief Sets a column index of matrix element with given rank in the matrix row. + * + * \param localIdx is the rank of the matrix element in the row. + * \param columnIndex is the new column index of the matrix element. + */ + __cuda_callable__ + void setColumnIndex( const IndexType localIdx, + const RealType& columnIndex ); + + /** + * \brief Sets both a value and a column index of matrix element with given rank in the matrix row. + * + * \param localIdx is the rank of the matrix element in the row. + * \param columnIndex is the new column index of the matrix element. + * \param value is the new value of the matrix element. + */ __cuda_callable__ void setElement( const IndexType localIdx, - const IndexType column, + const IndexType columnIndex, const RealType& value ); + /** + * \brief Comparison of two matrix rows. + * + * The other matrix row can be from any other matrix. + * + * \param other is another matrix row. + * \return \e true if both rows are the same, \e false otherwise. + */ template< typename _SegmentView, typename _ValuesView, typename _ColumnsIndexesView, @@ -81,6 +207,13 @@ class SparseMatrixRowView ColumnsIndexesViewType columnIndexes; }; +/** + * \brief Insertion operator for a sparse matrix row. + * + * \param str is an output stream. + * \param row is an input sparse matrix row. + * \return reference to the output stream. + */ template< typename SegmentView, typename ValuesView, typename ColumnsIndexesView, diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index 545e395fc309cc193fe6f0ed58bff92f7218a6de..a0a9c2604b8ee61d4fa7334f4b550756ea7fffcf 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -140,7 +140,7 @@ operator==( const SparseMatrixRowView< _SegmentView, _ValuesView, _ColumnsIndexe while( i < getSize() && i < other.getSize() ) { if( getColumnIndex( i ) != other.getColumnIndex( i ) ) return false; - if( getValue( i ) != other.getValue( i ) ) + if( ! _isBinary && getValue( i ) != other.getValue( i ) ) return false; ++i; } @@ -163,7 +163,11 @@ std::ostream& operator<<( std::ostream& str, const SparseMatrixRowView< SegmentV { using NonConstIndex = std::remove_const_t< typename SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >::IndexType >; for( NonConstIndex i = 0; i < row.getSize(); i++ ) - str << " [ " << row.getColumnIndex( i ) << " ] = " << row.getValue( i ) << ", "; + if( isBinary_ ) + // TODO: check getPaddingIndex(), print only the column indices of non-zeros but not the values + str << " [ " << row.getColumnIndex( i ) << " ] = " << (row.getColumnIndex( i ) >= 0) << ", "; + else + str << " [ " << row.getColumnIndex( i ) << " ] = " << row.getValue( i ) << ", "; return str; } diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 6bba7d4144618a41704d74ec08f91be04a1d2416..d80db7717bb6f77480da546f567d7a3f808a3268 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -19,6 +19,24 @@ namespace TNL { namespace Matrices { +/** + * \brief Implementation of sparse matrix view. + * + * It serves as an accessor to \ref SparseMatrix for example when passing the + * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels. + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam MatrixType specifies the type of matrix - its symmetry or binarity. See \ref MatrixType. + * Both symmetric and binary matrix types reduces memory consumption. Binary matrix does not store + * the matrix values explicitly since the non-zero elements can have only value equal to one. Symmetric + * matrices store only lower part of the matrix and its diagonal. The upper part is reconstructed on the fly. + * GeneralMatrix with no symmetry is used by default. + * \tparam Segments is a structure representing the sparse matrix format. Depending on the pattern of the non-zero elements + * different matrix formats can perform differently especially on GPUs. By default \ref CSR format is used. See also + * \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack. + */ template< typename Real, typename Device = Devices::Host, typename Index = int, @@ -26,37 +44,108 @@ template< typename Real, template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView > class SparseMatrixView : public MatrixView< Real, Device, Index > { + static_assert( + ! MatrixType::isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ); + public: + + // Supporting types - they are not important for the user + using BaseType = MatrixView< Real, Device, Index >; + using ValuesViewType = typename BaseType::ValuesView; + using ConstValuesViewType = typename ValuesViewType::ConstViewType; + using ColumnsIndexesViewType = Containers::VectorView< Index, Device, Index >; + using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType; + using RowsCapacitiesView = Containers::VectorView< Index, Device, Index >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; + + /** + * \brief Test of symmetric matrix type. + * + * \return \e true if the matrix is stored as symmetric and \e false otherwise. + */ static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + + /** + * \brief Test of binary matrix type. + * + * \return \e true if the matrix is stored as binary and \e false otherwise. + */ static constexpr bool isBinary() { return MatrixType::isBinary(); }; + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ + using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ + using IndexType = Index; + + /** + * \brief Templated type of segments view, i.e. sparse matrix format. + */ template< typename Device_, typename Index_ > using SegmentsViewTemplate = SegmentsView< Device_, Index_ >; + + /** + * \brief Type of segments view used by this matrix. It represents the sparse matrix format. + */ using SegmentsViewType = SegmentsView< Device, Index >; - using SegmentViewType = typename SegmentsViewType::SegmentViewType; - using DeviceType = Device; - using IndexType = Index; - using BaseType = MatrixView< Real, Device, Index >; - using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; - using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; - using ValuesViewType = typename BaseType::ValuesView; - using ConstValuesViewType = typename ValuesViewType::ConstViewType; - using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; - using ConstColumnsIndexesViewType = typename ColumnsIndexesViewType::ConstViewType; - using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; - using ConstRowView = typename RowView::ConstViewType; - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + /** + * \brief Type of related matrix view. + */ + using ViewType = SparseMatrixView< std::remove_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >; + + /** + * \brief Matrix view type for constant instances. + */ + using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, std::add_const_t< Index >, MatrixType, SegmentsViewTemplate >; + + /** + * \brief Type for accessing matrix rows. + */ + using RowView = SparseMatrixRowView< typename SegmentsViewType::SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; + + /** + * \brief Type for accessing constant matrix rows. + */ + using ConstRowView = typename RowView::ConstViewType; + /** + * \brief Helper type for getting self type or its modifications. + */ + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + typename _MatrixType = MatrixType, + template< typename, typename > class _SegmentsView = SegmentsView > + using Self = SparseMatrixView< _Real, _Device, _Index, _MatrixType, _SegmentsView >; + + /** + * \brief Constructor with no parameters. + */ __cuda_callable__ SparseMatrixView(); + /** + * \brief Constructor with all necessary data and views. + * + * \param rows is a number of matrix rows. + * \param columns is a number of matrix columns. + * \param values is a vector view with matrix elements values. + * \param columnIndexes is a vector view with matrix elements column indexes. + * \param segments is a segments view representing the sparse matrix format. + */ __cuda_callable__ SparseMatrixView( const IndexType rows, const IndexType columns, @@ -64,88 +153,423 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > const ColumnsIndexesViewType& columnIndexes, const SegmentsViewType& segments ); + /** + * \brief Copy constructor. + * + * \param matrix is an input sparse matrix view. + */ __cuda_callable__ - SparseMatrixView( const SparseMatrixView& m ) = default; + SparseMatrixView( const SparseMatrixView& matrix ) = default; - //__cuda_callable__ - //SparseMatrixView( const SparseMatrixView&& m ) = default; + /** + * \brief Move constructor. + * + * \param matrix is an input sparse matrix view. + */ + __cuda_callable__ + SparseMatrixView( SparseMatrixView&& matrix ) = default; + /** + * \brief Returns a modifiable view of the sparse matrix. + * + * \return sparse matrix view. + */ __cuda_callable__ ViewType getView(); + /** + * \brief Returns a non-modifiable view of the sparse matrix. + * + * \return sparse matrix view. + */ __cuda_callable__ ConstViewType getConstView() const; + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::SparseMatrix< RealType, [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`. + * + * \return \ref String with the serialization type. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp + * \par Output + * \include SparseMatrixViewExample_getSerializationType.out + */ static String getSerializationType(); + /** + * \brief Returns string with serialization type. + * + * See \ref SparseMatrix::getSerializationType. + * + * \return \e String with the serialization type. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp + * \par Output + * \include SparseMatrixExample_getSerializationType.out + */ virtual String getSerializationTypeVirtual() const; + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp + * \par Output + * \include SparseMatrixViewExample_getCompressedRowLengths.out + */ template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; + /** + * \brief Returns capacity of given matrix row. + * + * \param row index of matrix row. + * \return number of matrix elements allocated for the row. + */ __cuda_callable__ IndexType getRowCapacity( const IndexType row ) const; - IndexType getNumberOfNonzeroMatrixElements() const; - - void reset(); - + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp + * \par Output + * \include SparseMatrixViewExample_getConstRow.out + * + * See \ref SparseMatrixRowView. + */ __cuda_callable__ ConstRowView getRow( const IndexType& rowIdx ) const; + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp + * \par Output + * \include SparseMatrixViewExample_getRow.out + * + * See \ref SparseMatrixRowView. + */ __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp + * \par Output + * \include SparseMatrixViewExample_setElement.out + */ __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_addElement.cpp + * \par Output + * \include SparseMatrixViewExample_addElement.out + * + */ __cuda_callable__ void addElement( IndexType row, IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref SparseMatrix::getRow + * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_getElement.cpp + * \par Output + * \include SparseMatrixViewExample_getElement.out + * + */ __cuda_callable__ RealType getElement( IndexType row, IndexType column ) const; - /*template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const;*/ - - /*** - * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp + * \par Output + * \include SparseMatrixViewExample_rowsReduction.out */ - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector, - const RealType matrixMultiplicator = 1.0, - const RealType outVectorMultiplicator = 0.0, - const IndexType firstRow = 0, - IndexType lastRow = 0 ) const; - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_rowsReduction.cpp + * \par Output + * \include SparseMatrixViewExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include SparseMatrixViewExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include SparseMatrixViewExample_allRowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp + * \par Output + * \include SparseMatrixViewExample_forRows.out + */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; - + void forRows( IndexType begin, IndexType end, Function& function ) const; + + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_forRows.cpp + * \par Output + * \include SparseMatrixViewExample_forRows.out + */ template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); - + void forRows( IndexType begin, IndexType end, Function& function ); + + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref SparseMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp + * \par Output + * \include SparseMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ) const; + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref SparseMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/SparseMatrix/SparseMatrixViewExample_forAllRows.cpp + * \par Output + * \include SparseMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ); + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0, + const IndexType begin = 0, + IndexType end = 0 ) const; + template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 1aa4289dc778501f0f3cc52759e8bed1db82d4b7..5475faa56aed9c2c50128e8f0fdd3da4e1c94187 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -92,7 +92,8 @@ getSerializationType() return String( "Matrices::SparseMatrix< " ) + TNL::getSerializationType< RealType >() + ", " + TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; + TNL::getSerializationType< IndexType >() + ", " + + MatrixType::getSerializationType() + ", [any_allocator], [any_allocator] >"; } template< typename Real, @@ -120,7 +121,7 @@ getCompressedRowLengths( Vector& rowLengths ) const details::set_size_if_resizable( rowLengths, this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { @@ -149,7 +150,7 @@ template< typename Real, template< typename, typename > class SegmentsView > Index SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { const auto columns_view = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); @@ -174,9 +175,6 @@ getNumberOfNonzeroMatrixElements() const return 0.0; return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements }; - //auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) { - // sum += value; - //}; auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable { row_sums_view[ row ] = value; }; @@ -347,22 +345,6 @@ getElement( IndexType row, return 0.0; } -/*template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -rowVectorProduct( const IndexType row, - const Vector& vector ) const -{ - TNL_ASSERT_TRUE( false, "TODO: rowVectorProduct is not implemented yet."); - return 0; -}*/ - template< typename Real, typename Device, typename Index, @@ -442,23 +424,63 @@ template< typename Real, template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) +{ + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + IndexType& columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + { + if( isBinary() ) + return fetch( rowIdx, columnIdx, 1 ); + else + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + } + return zero; + }; + this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) { if( isBinary() ) - return fetch( rowIdx, columnIdx, globalIdx, 1 ); + return fetch( rowIdx, columnIdx, 1 ); else - return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); } return zero; }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + this->segments.segmentsReduction( begin, end, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -482,7 +504,7 @@ template< typename Real, template< typename Function > void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -forRows( IndexType first, IndexType last, Function& function ) const +forRows( IndexType begin, IndexType end, Function& function ) const { const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); @@ -494,7 +516,7 @@ forRows( IndexType first, IndexType last, Function& function ) const function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); return true; }; - this->segments.forSegments( first, last, f ); + this->segments.forSegments( begin, end, f ); } template< typename Real, @@ -505,7 +527,7 @@ template< typename Real, template< typename Function > void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -forRows( IndexType first, IndexType last, Function& function ) +forRows( IndexType begin, IndexType end, Function& function ) { auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); @@ -519,7 +541,7 @@ forRows( IndexType first, IndexType last, Function& function ) else function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); }; - this->segments.forSegments( first, last, f ); + this->segments.forSegments( begin, end, f ); } template< typename Real, @@ -709,7 +731,12 @@ print( std::ostream& str ) const value = ( RealType ) 1.0; else value = this->values.getElement( globalIdx ); - str << " Col:" << column << "->" << value << "\t"; + if( value ) + { + std::stringstream str_; + str_ << std::setw( 4 ) << std::right << column << ":" << std::setw( 4 ) << std::left << value; + str << std::setw( 10 ) << str_.str(); + } } str << std::endl; } diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h deleted file mode 100644 index 25472aa3c9bedd726683e23e8237a27cd1385494..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/Tridiagonal.h +++ /dev/null @@ -1,196 +0,0 @@ -/*************************************************************************** - Tridiagonal.h - description - ------------------- - begin : Nov 30, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace TNL { -namespace Matrices { - -template< typename Real = double, - typename Device = Devices::Host, - typename Index = int, - ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), - typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > -class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > -{ - public: - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - using RealAllocatorType = RealAllocator; - using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >; - using ValuesVectorType = typename BaseType::ValuesVectorType; - using ValuesViewType = typename ValuesVectorType::ViewType; - using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >; - using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; - using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; - - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Tridiagonal< _Real, _Device, _Index >; - - static constexpr ElementsOrganization getOrganization() { return Organization; }; - - Tridiagonal(); - - Tridiagonal( const IndexType rows, const IndexType columns ); - - ViewType getView() const; // TODO: remove const - - //ConstViewType getConstView() const; - - static String getSerializationType(); - - virtual String getSerializationTypeVirtual() const; - - void setDimensions( const IndexType rows, - const IndexType columns ); - - //template< typename Vector > - void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); - - template< typename Vector > - void getCompressedRowLengths( Vector& rowLengths ) const; - - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; - - IndexType getMaxRowLength() const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - void setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ); - - IndexType getNumberOfNonzeroMatrixElements() const; - - void reset(); - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - bool operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - bool operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - - __cuda_callable__ - RowView getRow( const IndexType& rowIdx ); - - __cuda_callable__ - const RowView getRow( const IndexType& rowIdx ) const; - - void setValue( const RealType& v ); - - void setElement( const IndexType row, - const IndexType column, - const RealType& value ); - - void addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - RealType getElement( const IndexType row, - const IndexType column ) const; - - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > - void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ) const; - - template< typename Function > - void forRows( IndexType first, IndexType last, Function& function ); - - template< typename Function > - void forAllRows( Function& function ) const; - - template< typename Function > - void forAllRows( Function& function ); - - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - void addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); - - template< typename Real2, typename Index2 > - void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); - - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; - - // copy assignment - Tridiagonal& operator=( const Tridiagonal& matrix ); - - // cross-device copy assignment - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > - Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ); - - void save( File& file ) const; - - void load( File& file ); - - void save( const String& fileName ) const; - - void load( const String& fileName ); - - void print( std::ostream& str ) const; - - const IndexerType& getIndexer() const; - - IndexerType& getIndexer(); - - __cuda_callable__ - IndexType getPaddingIndex() const; - - protected: - - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType localIdx ) const; - - IndexerType indexer; - - ViewType view; -}; - -} // namespace Matrices -} // namespace TNL - -#include diff --git a/src/TNL/Matrices/TridiagonalMatrix.h b/src/TNL/Matrices/TridiagonalMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..afd1392387eaf84fa901877e1a41f07bbdc6aa77 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrix.h @@ -0,0 +1,811 @@ +/*************************************************************************** + TridiagonalMatrix.h - description + ------------------- + begin : Nov 30, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +/** + * \brief Implementation of sparse tridiagonal matrix. + * + * Use this matrix type for storing of tridiagonal matrices i.e., matrices having + * non-zero matrix elements only on its diagonal and immediately above and bellow the diagonal. + * This is an example: + * \f[ + * \left( + * \begin{array}{ccccccc} + * 4 & -1 & . & . & . & . \\ + * -1 & 4 & -1 & . & . & . \\ + * . & -1 & 4 & -1 & . & . \\ + * . & . & -1 & 4 & -1 & . \\ + * . & . & . & -1 & 4 & -1 \\ + * . & . & . & . & -1 & 4 + * \end{array} + * \right) + * \f] + * + * Advantage is that we do not store the column indexes + * explicitly as it is in \ref SparseMatrix. This can reduce significantly the + * memory requirements which also means better performance. See the following table + * for the storage requirements comparison between \ref TridiagonalMatrix and \ref SparseMatrix. + * + * Data types | SparseMatrix | TridiagonalMatrix | Ratio + * --------------------|----------------------|---------------------|-------- + * float + 32-bit int | 8 bytes per element | 4 bytes per element | 50% + * double + 32-bit int| 12 bytes per element | 8 bytes per element | 75% + * float + 64-bit int | 12 bytes per element | 4 bytes per element | 30% + * double + 64-bit int| 16 bytes per element | 8 bytes per element | 50% + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. + * \tparam RealAllocator is allocator for the matrix elements. + */ +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > +class TridiagonalMatrix : public Matrix< Real, Device, Index, RealAllocator > +{ + public: + + // Supporting types - they are not important for the user + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using IndexerType = details::TridiagonalMatrixIndexer< Index, Organization >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + + /** + * \brief The type of matrix elements. + */ + using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ + using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ + using IndexType = Index; + + /** + * \brief The allocator for matrix elements values. + */ + using RealAllocatorType = RealAllocator; + + /** + * \brief Type of related matrix view. + * + * See \ref TridiagonalMatrixView. + */ + using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >; + + /** + * \brief Matrix view type for constant instances. + * + * See \ref TridiagonalMatrixView. + */ + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; + + /** + * \brief Type for accessing matrix rows. + */ + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; + + + /** + * \brief Helper type for getting self type or its modifications. + */ + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = TridiagonalMatrix< _Real, _Device, _Index >; + + static constexpr ElementsOrganization getOrganization() { return Organization; }; + + /** + * \brief Constructor with no parameters. + */ + TridiagonalMatrix(); + + /** + * \brief Constructor with matrix dimensions. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + */ + TridiagonalMatrix( const IndexType rows, const IndexType columns ); + + /** + * \brief Constructor with matrix dimensions, diagonals offsets and matrix elements. + * + * The number of matrix rows is deduced from the size of the initializer list \e data. + * + * \tparam ListReal is type used in the initializer list defining matrix elements values. + * + * \param columns is number of matrix columns. + * \param data is initializer list holding matrix elements. The size of the outer list + * defines the number of matrix rows. Each inner list defines values of each sub-diagonal + * and so its size should be lower or equal to three. Values + * of sub-diagonals which do not fit to given row are omitted. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_Constructor_init_list_1.cpp + * \par Output + * \include TridiagonalMatrixExample_Constructor_init_list_1.out + */ + template< typename ListReal > + TridiagonalMatrix( const IndexType columns, + const std::initializer_list< std::initializer_list< ListReal > >& data ); + + /** + * \brief Copy constructor. + * + * \param matrix is an input matrix. + */ + TridiagonalMatrix( const TridiagonalMatrix& matrix ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is an input matrix. + */ + TridiagonalMatrix( TridiagonalMatrix&& matrix ) = default; + + /** + * \brief Returns a modifiable view of the tridiagonal matrix. + * + * See \ref TridiagonalMatrixView. + * + * \return tridiagonal matrix view. + */ + ViewType getView() const; // TODO: remove const + + /** + * \brief Returns a non-modifiable view of the tridiagonal matrix. + * + * See \ref TridiagonalMatrixView. + * + * \return tridiagonal matrix view. + */ + ConstViewType getConstView() const; + + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::TridiagonalMatrix< RealType, [any_device], IndexType, ElementsOrganization, [any_allocator] >`. + * + * \return \ref String with the serialization type. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp + * \par Output + * \include TridiagonalMatrixExample_getSerializationType.out + */ + static String getSerializationType(); + + /** + * \brief Returns string with serialization type. + * + * See \ref TridiagonalMatrix::getSerializationType. + * + * \return \e String with the serialization type. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getSerializationType.cpp + * \par Output + * \include TridiagonalMatrixExample_getSerializationType.out + */ + virtual String getSerializationTypeVirtual() const; + + /** + * \brief Set matrix dimensions. + * + * \param rows is number of matrix rows. + * \param columns is number of matrix columns. + */ + void setDimensions( const IndexType rows, + const IndexType columns ); + + /** + * \brief This method is for compatibility with \ref SparseMatrix. + * + * It checks if the number of matrix diagonals is compatible with + * required number of non-zero matrix elements in each row. If not + * exception is thrown. + * + * \tparam RowCapacitiesVector is vector-like container type for holding required + * row capacities. + * + * \param rowCapacities is vector-like container holding required row capacities. + */ + template< typename RowCapacitiesVector > + void setRowCapacities( const RowCapacitiesVector& rowCapacities ); + + /** + * \brief Set matrix elements from an initializer list. + * + * \tparam ListReal is data type of the initializer list. + * + * \param data is initializer list holding matrix elements. The size of the outer list + * defines the number of matrix rows. Each inner list defines values of each sub-diagonal + * and so its size should be lower or equal to three. Values + * of sub-diagonals which do not fit to given row are omitted. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElements.cpp + * \par Output + * \include TridiagonalMatrixExample_setElements.out + */ + template< typename ListReal > + void setElements( const std::initializer_list< std::initializer_list< ListReal > >& data ); + + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getCompressedRowLengths.cpp + * \par Output + * \include TridiagonalMatrixExample_getCompressedRowLengths.out + */ + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + //[[deprecated]] + //IndexType getRowLength( const IndexType row ) const; + + //IndexType getMaxRowLength() const; + + /** + * \brief Setup the matrix dimensions and diagonals offsets based on another tridiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * + * \param matrix is the source matrix. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_ > + void setLike( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ); + + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Resets the matrix to zero dimensions. + */ + void reset(); + + /** + * \brief Comparison operator with another tridiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * + * \return \e true if both matrices are identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_ > + bool operator == ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; + + /** + * \brief Comparison operator with another tridiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * \tparam RealAllocator_ is \e RealAllocator of the source matrix. + * + * \param matrix is the source matrix. + * + * \return \e true if both matrices are NOT identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_ > + bool operator != ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; + + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getRow.cpp + * \par Output + * \include TridiagonalMatrixExample_getRow.out + * + * See \ref TridiagonalMatrixRowView. + */ + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getConstRow.cpp + * \par Output + * \include TridiagonalMatrixExample_getConstRow.out + * + * See \ref TridiagonalMatrixRowView. + */ + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + /** + * \brief Set all matrix elements to given value. + * + * \param value is the new value of all matrix elements. + */ + void setValue( const RealType& value ); + + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_setElement.cpp + * \par Output + * \include TridiagonalMatrixExample_setElement.out + */ + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementTriplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_addElement.cpp + * \par Output + * \include TridiagonalMatrixExample_addElement.out + * + */ + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementTriplicator = 1.0 ); + + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_getElement.cpp + * \par Output + * \include TridiagonalMatrixExample_getElement.out + */ + RealType getElement( const IndexType row, + const IndexType column ) const; + + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp + * \par Output + * \include TridiagonalMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on matrix rows of constant matrix instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_rowsReduction.cpp + * \par Output + * \include TridiagonalMatrixExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType begin, IndexType end, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp + * \par Output + * \include TridiagonalMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows of constant matrix instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_allRowsReduction.cpp + * \par Output + * \include TridiagonalMatrixExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + /** + * \brief Method for iteration over matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp + * \par Output + * \include TridiagonalMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ) const; + + /** + * \brief Method for iteration over matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forRows.cpp + * \par Output + * \include TridiagonalMatrixExample_forRows.out + */ + template< typename Function > + void forRows( IndexType begin, IndexType end, Function& function ); + + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp + * \par Output + * \include TridiagonalMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ) const; + + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixExample_forAllRows.cpp + * \par Output + * \include TridiagonalMatrixExample_forAllRows.out + */ + template< typename Function > + void forAllRows( Function& function ); + + /*template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const;*/ + + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixTriplicator * ( * this ) * inVector + outVectorTriplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixTriplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorTriplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixTriplicator = 1.0, + const RealType outVectorTriplicator = 0.0, + const IndexType begin = 0, + IndexType end = 0 ) const; + + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void addMatrix( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, + const RealType& matrixTriplicator = 1.0, + const RealType& thisMatrixTriplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const TridiagonalMatrix< Real2, Device, Index2 >& matrix, + const RealType& matrixTriplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + /** + * \brief Assignment of exactly the same matrix type. + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + TridiagonalMatrix& operator=( const TridiagonalMatrix& matrix ); + + /** + * \brief Assignment of another tridiagonal matrix + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_, + typename RealAllocator_ > + TridiagonalMatrix& operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ); + + /** + * \brief Method for saving the matrix to a file. + * + * \param file is the output file. + */ + void save( File& file ) const; + + /** + * \brief Method for loading the matrix from a file. + * + * \param file is the input file. + */ + void load( File& file ); + + /** + * \brief Method for saving the matrix to the file with given filename. + * + * \param fileName is name of the file. + */ + void save( const String& fileName ) const; + + /** + * \brief Method for loading the matrix from the file with given filename. + * + * \param fileName is name of the file. + */ + void load( const String& fileName ); + + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ + void print( std::ostream& str ) const; + + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return constant reference to the indexer. + */ + const IndexerType& getIndexer() const; + + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return non-constant reference to the indexer. + */ + IndexerType& getIndexer(); + + /** + * \brief Returns padding index denoting padding zero elements. + * + * These elements are used for efficient data alignment in memory. + * + * \return value of the padding index. + */ + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; + + IndexerType indexer; + + ViewType view; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/TridiagonalMatrix.hpp similarity index 65% rename from src/TNL/Matrices/Tridiagonal.hpp rename to src/TNL/Matrices/TridiagonalMatrix.hpp index 8a1804e9d36cff5052a15f7320f1f607927378e6..1d522e40d885b61a43f249595580b48773a7e254 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/TridiagonalMatrix.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Tridiagonal.hpp - description + TridiagonalMatrix.hpp - description ------------------- begin : Nov 30, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -12,22 +12,19 @@ #include #include -#include +#include #include namespace TNL { namespace Matrices { -template< typename Device > -class TridiagonalDeviceDependentCode; - template< typename Real, typename Device, typename Index, ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -Tridiagonal() +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix() { } @@ -36,8 +33,8 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -Tridiagonal( const IndexType rows, const IndexType columns ) +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix( const IndexType rows, const IndexType columns ) { this->setDimensions( rows, columns ); } @@ -47,25 +44,27 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getView() const -> ViewType + template< typename ListReal > +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix( const IndexType columns, + const std::initializer_list< std::initializer_list< ListReal > >& data ) { - // TODO: fix when getConstView works - return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer ); + this->setDimensions( data.size(), columns ); + this->setElements( data ); } -/*template< typename Real, +template< typename Real, typename Device, typename Index, ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getConstView() const -> ConstViewType +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +getView() const -> ViewType { - return ConstViewType( this->values.getConstView(), indexer ); -}*/ + // TODO: fix when getConstView works + return ViewType( const_cast< TridiagonalMatrix* >( this )->values.getView(), indexer ); +} template< typename Real, typename Device, @@ -73,10 +72,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > String -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getSerializationType() { - return String( "Matrices::Tridiagonal< " ) + + return String( "Matrices::TridiagonalMatrix< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + ( Organization ? "true" : "false" ) + ", [any_allocator] >"; @@ -88,7 +87,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > String -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -100,7 +99,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); @@ -115,24 +114,24 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > - // template< typename Vector > + template< typename RowCapacitiesVector > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +setRowCapacities( const RowCapacitiesVector& rowCapacities ) { - if( max( rowLengths ) > 3 ) + if( max( rowCapacities ) > 3 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( rowLengths.getElement( 0 ) > 2 ) + if( rowCapacities.getElement( 0 ) > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); if( this->getRows() > this->getColumns() ) - if( rowLengths.getElement( this->getRows()-1 ) > 1 ) + if( rowCapacities.getElement( this->getRows()-1 ) > 1 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); if( this->getRows() == this->getColumns() ) - if( rowLengths.getElement( this->getRows()-1 ) > 2 ) + if( rowCapacities.getElement( this->getRows()-1 ) > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); if( this->getRows() < this->getColumns() ) - if( rowLengths.getElement( this->getRows()-1 ) > 3 ) + if( rowCapacities.getElement( this->getRows()-1 ) > 3 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); } @@ -141,36 +140,46 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > - template< typename Vector > + template< typename ListReal > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getCompressedRowLengths( Vector& rowLengths ) const +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +setElements( const std::initializer_list< std::initializer_list< ListReal > >& data ) { - return this->view.getCompressedRowLengths( rowLengths ); + if( std::is_same< DeviceType, Devices::Host >::value ) + { + this->getValues() = 0.0; + auto row_it = data.begin(); + for( size_t rowIdx = 0; rowIdx < data.size(); rowIdx++ ) + { + auto data_it = row_it->begin(); + IndexType i = 0; + while( data_it != row_it->end() ) + this->getRow( rowIdx ).setElement( i++, *data_it++ ); + row_it ++; + } + } + else + { + TridiagonalMatrix< Real, Devices::Host, Index, Organization > hostMatrix( + this->getRows(), + this->getColumns() ); + hostMatrix.setElements( data ); + *this = hostMatrix; + } } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization, - typename RealAllocator > -Index -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getRowLength( const IndexType row ) const -{ - return this->view.getRowLength( row ); -} template< typename Real, typename Device, typename Index, ElementsOrganization Organization, typename RealAllocator > -Index -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getMaxRowLength() const + template< typename Vector > +void +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const { - return this->view.getMaxRowLength(); + return this->view.getCompressedRowLengths( rowLengths ); } template< typename Real, @@ -180,8 +189,8 @@ template< typename Real, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ) +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +setLike( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ) { this->setDimensions( m.getRows(), m.getColumns() ); } @@ -192,10 +201,10 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > Index -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -getNumberOfNonzeroMatrixElements() const +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +getNonzeroElementsCount() const { - return this->view.getNumberOfNonzeroMatrixElements(); + return this->view.getNonzeroElementsCount(); } template< typename Real, @@ -204,7 +213,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -217,14 +226,14 @@ template< typename Real, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator == ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { if( Organization == Organization_ ) return this->values == matrix.values; else { - TNL_ASSERT( false, "TODO" ); + TNL_ASSERT_TRUE( false, "TODO" ); } } @@ -235,8 +244,8 @@ template< typename Real, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator != ( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -247,7 +256,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: setValue( const RealType& v ) { this->view.setValue( v ); @@ -260,7 +269,7 @@ template< typename Real, typename RealAllocator > __cuda_callable__ auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); @@ -273,7 +282,7 @@ template< typename Real, typename RealAllocator > __cuda_callable__ auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); @@ -285,7 +294,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { this->view.setElement( row, column, value ); @@ -297,7 +306,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -312,7 +321,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > Real -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); @@ -325,7 +334,7 @@ template< typename Real, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); @@ -338,12 +347,38 @@ template< typename Real, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + template< typename Real, typename Device, typename Index, @@ -351,7 +386,7 @@ template< typename Real, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); @@ -364,7 +399,7 @@ template< typename Real, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); @@ -377,7 +412,7 @@ template< typename Real, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) const { this->view.forRows( 0, this->getRows(), function ); @@ -390,7 +425,7 @@ template< typename Real, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) { this->view.forRows( 0, this->getRows(), function ); @@ -401,27 +436,17 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -template< typename Vector > -__cuda_callable__ -typename Vector::RealType -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ - return this->view.rowVectorProduct(); -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization, - typename RealAllocator > - template< typename InVector, - typename OutVector > + template< typename InVector, typename OutVector > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator, + const IndexType begin, + IndexType end ) const { - this->view.vectorProduct( inVector, outVector ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, begin, end ); } template< typename Real, @@ -431,8 +456,8 @@ template< typename Real, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +addMatrix( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -444,8 +469,8 @@ template< typename Real, typename Real2, typename Index, typename Index2 > -__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, - Tridiagonal< Real, Devices::Cuda, Index >* outMatrix, +__global__ void TridiagonalMatrixTranspositionCudaKernel( const TridiagonalMatrix< Real2, Devices::Cuda, Index2 >* inMatrix, + TridiagonalMatrix< Real, Devices::Cuda, Index >* outMatrix, const Real matrixMultiplicator, const Index gridIdx ) { @@ -473,7 +498,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::getTransposition( const TridiagonalMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), @@ -493,8 +518,8 @@ void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTranspo if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Tridiagonal* kernel_this = Cuda::passToDevice( *this ); - typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; + TridiagonalMatrix* kernel_this = Cuda::passToDevice( *this ); + typedef TridiagonalMatrix< Real2, Device, Index2 > InMatrixType; InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); @@ -503,7 +528,7 @@ void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTranspo { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + TridiagonalMatrixTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_inMatrix, kernel_this, matrixMultiplicator, @@ -523,7 +548,7 @@ template< typename Real, typename RealAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b, +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -543,8 +568,8 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, Organization, RealAllocator >& -Tridiagonal< Real, Device, Index, Organization, RealAllocator >::operator=( const Tridiagonal& matrix ) +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >& +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::operator=( const TridiagonalMatrix& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -558,9 +583,9 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > -Tridiagonal< Real, Device, Index, Organization, RealAllocator >& -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: -operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >& +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator=( const TridiagonalMatrix< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -575,17 +600,17 @@ operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocat if( std::is_same< Device, Device_ >::value ) { const auto matrix_view = matrix.getView(); - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; this->forAllRows( f ); } else { - Tridiagonal< Real, Device, Index, Organization_ > auxMatrix; + TridiagonalMatrix< Real, Device, Index, Organization_ > auxMatrix; auxMatrix = matrix; const auto matrix_view = auxMatrix.getView(); - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; this->forAllRows( f ); @@ -599,7 +624,7 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); } @@ -609,7 +634,7 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( File& file ) +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); this->indexer.setDimensions( this->getRows(), this->getColumns() ); @@ -621,7 +646,7 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const { Object::save( fileName ); } @@ -631,7 +656,7 @@ template< typename Real, typename Index, ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName ) +void TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } @@ -642,7 +667,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: print( std::ostream& str ) const { this->view.print( str ); @@ -654,7 +679,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -666,7 +691,7 @@ template< typename Real, ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getIndexer() -> IndexerType& { return this->indexer; @@ -679,12 +704,10 @@ template< typename Real, typename RealAllocator > __cuda_callable__ Index -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getElementIndex( const IndexType row, const IndexType column ) const { - IndexType localIdx = column - row; - if( row > 0 ) - localIdx++; + IndexType localIdx = column - row + 1; TNL_ASSERT_GE( localIdx, 0, "" ); TNL_ASSERT_LT( localIdx, 3, "" ); @@ -699,115 +722,11 @@ template< typename Real, typename RealAllocator > __cuda_callable__ Index -Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +TridiagonalMatrix< Real, Device, Index, Organization, RealAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); } -/* -template<> -class TridiagonalDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return 2*row + column; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ 1 ]; - Index i = 3 * row; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ]; - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ] + - vector[ row + 1 ] * values[ i + 1 ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class TridiagonalDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return ( column - row + 1 )*rows + row - 1; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ rows - 1 ]; - Index i = row - 1; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ]; - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ] + - vector[ row + 1 ] * values[ i + 2*rows ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; - */ - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 9496136dcf465cbb2a68c8fb37d0508ad7e10aac..321b0fab76db88c3f404e8188de08225afcf2de6 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -19,6 +19,20 @@ namespace TNL { namespace Matrices { +/** + * \brief Implementation of sparse tridiagonal matrix. + * + * It serves as an accessor to \ref SparseMatrix for example when passing the + * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels. + * + * See \ref TridiagonalMatrix for more details. + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. + */ template< typename Real = double, typename Device = Devices::Host, typename Index = int, @@ -26,102 +40,510 @@ template< typename Real = double, class TridiagonalMatrixView : public MatrixView< Real, Device, Index > { public: + + + // Supporting types - they are not important for the user + using BaseType = MatrixView< Real, Device, Index >; + using ValuesViewType = typename BaseType::ValuesView; + using IndexerType = details::TridiagonalMatrixIndexer< Index, Organization >; + + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ using IndexType = Index; - using BaseType = MatrixView< Real, Device, Index >; - using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >; - using ValuesViewType = typename BaseType::ValuesView; + + /** + * \brief Type of related matrix view. + */ using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >; + + /** + * \brief Matrix view type for constant instances. + */ using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; - using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; - // TODO: remove this - it is here only for compatibility with original matrix implementation - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + /** + * \brief Type for accessing matrix rows. + */ + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; + /** + * \brief Helper type for getting self type or its modifications. + */ template< typename _Real = Real, typename _Device = Device, typename _Index = Index, ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > using Self = TridiagonalMatrixView< _Real, _Device, _Index, Organization_ >; + /** + * \brief Constructor with no parameters. + */ + __cuda_callable__ TridiagonalMatrixView(); + /** + * \brief Constructor with all necessary data and views. + * + * \param values is a vector view with matrix elements values + * \param indexer is an indexer of matrix elements + */ + __cuda_callable__ TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ); + /** + * \brief Copy constructor. + * + * \param matrix is an input tridiagonal matrix view. + */ + __cuda_callable__ + TridiagonalMatrixView( const TridiagonalMatrixView& view ) = default; + + /** + * \brief Move constructor. + * + * \param matrix is an input tridiagonal matrix view. + */ + __cuda_callable__ + TridiagonalMatrixView( TridiagonalMatrixView&& view ) = default; + + /** + * \brief Returns a modifiable view of the tridiagonal matrix. + * + * \return tridiagonal matrix view. + */ ViewType getView(); + /** + * \brief Returns a non-modifiable view of the tridiagonal matrix. + * + * \return tridiagonal matrix view. + */ ConstViewType getConstView() const; + /** + * \brief Returns string with serialization type. + * + * The string has a form `Matrices::TridiagonalMatrix< RealType, [any_device], IndexType, Organization, [any_allocator] >`. + * + * See \ref TridiagonalMatrix::getSerializationType. + * + * \return \ref String with the serialization type. + */ static String getSerializationType(); + /** + * \brief Returns string with serialization type. + * + * See \ref TridiagonalMatrix::getSerializationType. + * + * \return \ref String with the serialization type. + */ virtual String getSerializationTypeVirtual() const; + /** + * \brief Computes number of non-zeros in each row. + * + * \param rowLengths is a vector into which the number of non-zeros in each row + * will be stored. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getCompressedRowLengths.cpp + * \par Output + * \include TridiagonalMatrixViewExample_getCompressedRowLengths.out + */ template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; - - IndexType getMaxRowLength() const; - - IndexType getNumberOfNonzeroMatrixElements() const; - - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + //[[deprecated]] + //IndexType getRowLength( const IndexType row ) const; + + //IndexType getMaxRowLength() const; + + /** + * \brief Returns number of non-zero matrix elements. + * + * This method really counts the non-zero matrix elements and so + * it returns zero for matrix having all allocated elements set to zero. + * + * \return number of non-zero matrix elements. + */ + IndexType getNonzeroElementsCount() const; + + /** + * \brief Comparison operator with another tridiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * + * \return \e true if both matrices are identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_ > bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + /** + * \brief Comparison operator with another multidiagonal matrix. + * + * \tparam Real_ is \e Real type of the source matrix. + * \tparam Device_ is \e Device type of the source matrix. + * \tparam Index_ is \e Index type of the source matrix. + * \tparam Organization_ is \e Organization of the source matrix. + * + * \param matrix is the source matrix. + * + * \return \e true if both matrices are NOT identical and \e false otherwise. + */ + template< typename Real_, + typename Device_, + typename Index_, + ElementsOrganization Organization_ > bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; + /** + * \brief Non-constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getRow.cpp + * \par Output + * \include TridiagonalMatrixViewExample_getRow.out + * + * See \ref TridiagonalMatrixRowView. + */ __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + /** + * \brief Constant getter of simple structure for accessing given matrix row. + * + * \param rowIdx is matrix row index. + * + * \return RowView for accessing given matrix row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getConstRow.cpp + * \par Output + * \include TridiagonalMatrixViewExample_getConstRow.out + * + * See \ref TridiagonalMatrixRowView. + */ __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; + /** + * \brief Set all matrix elements to given value. + * + * \param value is the new value of all matrix elements. + */ void setValue( const RealType& v ); + /** + * \brief Sets element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_setElement.cpp + * \par Output + * \include TridiagonalMatrixViewExample_setElement.out + */ + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + /** + * \brief Add element at given \e row and \e column to given \e value. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * The call may fail if the matrix row capacity is exhausted. + * + * \param row is row index of the element. + * \param column is columns index of the element. + * \param value is the value the element will be set to. + * \param thisElementMultiplicator is multiplicator the original matrix element + * value is multiplied by before addition of given \e value. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_addElement.cpp + * \par Output + * \include TridiagonalMatrixViewExample_addElement.out + * + */ + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + /** + * \brief Returns value of matrix element at position given by its row and column index. + * + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the + * performance is very low. For higher performance see. \ref TridiagonalMatrix::getRow + * or \ref TridiagonalMatrix::forRows and \ref TridiagonalMatrix::forAllRows. + * + * \param row is a row index of the matrix element. + * \param column i a column index of the matrix element. + * + * \return value of given matrix element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_getElement.cpp + * \par Output + * \include TridiagonalMatrixViewExample_getElement.out + * + */ + __cuda_callable__ RealType getElement( const IndexType row, const IndexType column ) const; + /** + * \brief Method for performing general reduction on matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp + * \par Output + * \include TridiagonalMatrixViewExample_rowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for performing general reduction on matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_rowsReduction.cpp + * \par Output + * \include TridiagonalMatrixViewExample_rowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for performing general reduction on all matrix rows for constant instances. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include TridiagonalMatrixViewExample_allRowsReduction.out + */ template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + /** + * \brief Method for performing general reduction on all matrix rows. + * + * \tparam Fetch is a type of lambda function for data fetch declared as + * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. + * The return type of this lambda can be any non void. + * \tparam Reduce is a type of lambda function for reduction declared as + * `reduce( const FetchValue& v1, const FetchValue& v2 ) -> FetchValue`. + * \tparam Keep is a type of lambda function for storing results of reduction in each row. + * It is declared as `keep( const IndexType rowIdx, const double& value )`. + * \tparam FetchValue is type returned by the Fetch lambda function. + * + * \param fetch is an instance of lambda function for data fetch. + * \param reduce is an instance of lambda function for reduction. + * \param keep in an instance of lambda function for storing results. + * \param zero is zero of given reduction operation also known as idempotent element. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_allRowsReduction.cpp + * \par Output + * \include TridiagonalMatrixViewExample_allRowsReduction.out + */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ); + + /** + * \brief Method for iteration over all matrix rows for constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp + * \par Output + * \include TridiagonalMatrixViewExample_forRows.out + */ template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; + /** + * \brief Method for iteration over all matrix rows for non-constant instances. + * + * \tparam Function is type of lambda function that will operate on matrix elements. + * It is should have form like + * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can + * be interrupted. + * + * \param begin defines beginning of the range [begin,end) of rows to be processed. + * \param end defines ending of the range [begin,end) of rows to be processed. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forRows.cpp + * \par Output + * \include TridiagonalMatrixViewExample_forRows.out + */ template< typename Function > void forRows( IndexType first, IndexType last, Function& function ); + /** + * \brief This method calls \e forRows for all matrix rows (for constant instances). + * + * See \ref TridiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp + * \par Output + * \include TridiagonalMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ) const; + /** + * \brief This method calls \e forRows for all matrix rows. + * + * See \ref TridiagonalMatrix::forRows. + * + * \tparam Function is a type of lambda function that will operate on matrix elements. + * \param function is an instance of the lambda function to be called in each row. + * + * \par Example + * \include Matrices/TridiagonalMatrix/TridiagonalMatrixViewExample_forAllRows.cpp + * \par Output + * \include TridiagonalMatrixViewExample_forAllRows.out + */ template< typename Function > void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - + /** + * \brief Computes product of matrix and vector. + * + * More precisely, it computes: + * + * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` + * + * \tparam InVector is type of input vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * \tparam OutVector is type of output vector. It can be \ref Vector, + * \ref VectorView, \ref Array, \ref ArraView or similar container. + * + * \param inVector is input vector. + * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. + */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0, + const IndexType begin = 0, + IndexType end = 0 ) const; template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, @@ -139,18 +561,58 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; + /** + * \brief Assignment of exactly the same matrix type. + * + * \param matrix is input matrix for the assignment. + * \return reference to this matrix. + */ + TridiagonalMatrixView& operator=( const TridiagonalMatrixView& view ); + + /** + * \brief Method for saving the matrix to a file. + * + * \param file is the output file. + */ void save( File& file ) const; + /** + * \brief Method for saving the matrix to the file with given filename. + * + * \param fileName is name of the file. + */ void save( const String& fileName ) const; + /** + * \brief Method for printing the matrix to output stream. + * + * \param str is the output stream. + */ void print( std::ostream& str ) const; + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return constant reference to the indexer. + */ __cuda_callable__ const IndexerType& getIndexer() const; + /** + * \brief This method returns matrix elements indexer used by this matrix. + * + * \return non-constant reference to the indexer. + */ __cuda_callable__ IndexerType& getIndexer(); + /** + * \brief Returns padding index denoting padding zero elements. + * + * These elements are used for efficient data alignment in memory. + * + * \return value of the padding index. + */ __cuda_callable__ IndexType getPaddingIndex() const; diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 092e63cbc00b7bcea7d42ea1d2e0f84389dce8c1..d63e6dd50821370c7e4e41b4b3ede9c55602ad67 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -66,10 +66,10 @@ String TridiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationType() { - return String( "Matrices::Tridiagonal< " ) + + return String( "Matrices::TridiagonalMatrix< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( Organization ? "true" : "false" ) + ", [any_allocator] >"; + TNL::getSerializationType( Organization ) + ", [any_allocator] >"; } template< typename Real, @@ -107,36 +107,13 @@ getCompressedRowLengths( Vector& rowLengths ) const this->allRowsReduction( fetch, reduce, keep, 0 ); } - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -TridiagonalMatrixView< Real, Device, Index, Organization >:: -getRowLength( const IndexType row ) const -{ - return this->indexer.getRowSize( row ); -} - template< typename Real, typename Device, typename Index, ElementsOrganization Organization > Index TridiagonalMatrixView< Real, Device, Index, Organization >:: -getMaxRowLength() const -{ - return 3; -} - -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -Index -TridiagonalMatrixView< Real, Device, Index, Organization >:: -getNumberOfNonzeroMatrixElements() const +getNonzeroElementsCount() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { @@ -158,7 +135,7 @@ operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ return this->values == matrix.values; else { - TNL_ASSERT( false, "TODO" ); + TNL_ASSERT_TRUE( false, "TODO" ); } } @@ -213,7 +190,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > -void +__cuda_callable__ void TridiagonalMatrixView< Real, Device, Index, Organization >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { @@ -223,9 +200,13 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( false, "Wrong matrix element coordinates tridiagonal matrix." ); +#else std::stringstream msg; msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; throw std::logic_error( msg.str() ); +#endif } this->values.setElement( this->getElementIndex( row, column ), value ); } @@ -234,7 +215,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > -void +__cuda_callable__ void TridiagonalMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, const IndexType column, @@ -247,9 +228,13 @@ addElement( const IndexType row, TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( false, "Wrong matrix element coordinates tridiagonal matrix." ); +#else std::stringstream msg; msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; throw std::logic_error( msg.str() ); +#endif } const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); @@ -259,7 +244,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > -Real +__cuda_callable__ Real TridiagonalMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const { @@ -290,8 +275,52 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke Real_ sum( zero ); if( rowIdx == 0 ) { - reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + reduce( sum, fetch( 0, 2, values_view[ indexer.getGlobalIndex( 0, 2 ) ] ) ); + keep( 0, sum ); + return; + } + if( rowIdx + 1 < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); + keep( rowIdx, sum ); + return; + } + if( rowIdx < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + keep( rowIdx, sum ); + } + else + { + keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, Organization >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) +{ + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); + auto values_view = this->values.getConstView(); + const auto indexer = this->indexer; + const auto zero = zero_; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + Real_ sum( zero ); + if( rowIdx == 0 ) + { + reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + reduce( sum, fetch( 0, 2, values_view[ indexer.getGlobalIndex( 0, 2 ) ] ) ); keep( 0, sum ); return; } @@ -329,6 +358,18 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); } +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, Organization >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) +{ + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); +} + template< typename Real, typename Device, typename Index, @@ -344,9 +385,9 @@ forRows( IndexType first, IndexType last, Function& function ) const auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); - } + function( 0, 1, 0, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); + function( 0, 2, 1, values_view[ indexer.getGlobalIndex( 0, 2 ) ], compute ); + } else if( rowIdx + 1 < indexer.getColumns() ) { function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); @@ -375,25 +416,26 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); - } + function( 0, 1, 0, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); + function( 0, 2, 1, values_view[ indexer.getGlobalIndex( 0, 2 ) ], compute ); + } else if( rowIdx + 1 < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); } else if( rowIdx < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); } else - function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -422,27 +464,20 @@ forAllRows( Function& function ) this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); } -template< typename Real, - typename Device, - typename Index, - ElementsOrganization Organization > -template< typename Vector > -__cuda_callable__ -typename Vector::RealType -TridiagonalMatrixView< Real, Device, Index, Organization >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ -} - template< typename Real, typename Device, typename Index, ElementsOrganization Organization > template< typename InVector, typename OutVector > -void +void TridiagonalMatrixView< Real, Device, Index, Organization >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator, + const IndexType begin, + IndexType end ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); @@ -455,10 +490,31 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; }; - auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + auto keeper1 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; }; - this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); + auto keeper2 = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; + }; + if( end == 0 ) + end = this->getRows(); + if( matrixMultiplicator == 1.0 && outVectorMultiplicator == 0.0 ) + this->rowsReduction( begin, end, fetch, reduction, keeper1, ( RealType ) 0.0 ); + else + this->rowsReduction( begin, end, fetch, reduction, keeper2, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + ElementsOrganization Organization > +TridiagonalMatrixView< Real, Device, Index, Organization >& +TridiagonalMatrixView< Real, Device, Index, Organization >:: +operator=( const TridiagonalMatrixView& view ) +{ + MatrixView< Real, Device, Index >::operator=( view ); + this->indexer = view.indexer; + return *this; } template< typename Real, @@ -487,13 +543,13 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& const auto matrix_view = matrix; const auto matrixMult = matrixMultiplicator; const auto thisMult = thisMatrixMultiplicator; - auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; - auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; - auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; if( thisMult == 0.0 ) @@ -678,9 +734,7 @@ Index TridiagonalMatrixView< Real, Device, Index, Organization >:: getElementIndex( const IndexType row, const IndexType column ) const { - IndexType localIdx = column - row; - if( row > 0 ) - localIdx++; + IndexType localIdx = column - row + 1; TNL_ASSERT_GE( localIdx, 0, "" ); TNL_ASSERT_LT( localIdx, 3, "" ); diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h index 3597c30f7d3eec37ef85b050cb01963e5f34715a..bba7b524e980b9cbe414eddbd18b5e156c806477 100644 --- a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h @@ -21,6 +21,7 @@ class MultidiagonalMatrixIndexer public: using IndexType = Index; + using ConstType = MultidiagonalMatrixIndexer< std::add_const_t< Index >, RowMajorOrder >; static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h index d7a3a429dc00dd194aef5aab2f05f289b9c9215e..8412d3dad815da2672570b6705452b49a86ebc69 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -433,8 +433,8 @@ void tridiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); @@ -483,10 +483,10 @@ void multidiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; - using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; - DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType; + DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 }; const IndexType rows( 10 ), columns( 10 ); MultidiagonalHost hostMatrix( rows, columns, diagonals ); diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp index 87d5e139bfc662cdd487e8fcfa2ce3c64a4b10c2..8d532ae7123b49f592b47a268ffc0bc3230580ab 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -948,7 +948,7 @@ void test_RowsReduction() // Compute number of non-zero elements in rows. typename Matrix::RowsCapacitiesType rowLengths( rows ); auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { @@ -963,7 +963,7 @@ void test_RowsReduction() // Compute max norm TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); auto rowSums_view = rowSums.getView(); - auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return abs( value ); }; auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { @@ -1136,62 +1136,4 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( std::remove( filename ), 0 ); } -template< typename Matrix > -void test_Print() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 5x4 sparse matrix: - * - * / 1 1 1 0 \ - * | 0 0 0 1 | - * | 1 1 1 0 | - * | 0 1 1 1 | - * \ 0 0 1 1 / - */ - - const IndexType m_rows = 5; - const IndexType m_cols = 4; - - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, 1 ); - - m.setElement( 1, 3, 1 ); // 1st row - - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, 1 ); - - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, 1 ); - - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, 1 ); - - std::stringstream printed; - std::stringstream couted; - - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); - - m.print( std::cout ); //all the std::cout goes to ss - - std::cout.rdbuf(old_buf); //reset - - couted << "Row: 0 -> Col:0->1 Col:1->1 Col:2->1\t\n" - "Row: 1 -> Col:3->1\t\n" - "Row: 2 -> Col:0->1 Col:1->1 Col:2->1\t\n" - "Row: 3 -> Col:1->1 Col:2->1 Col:3->1\t\n" - "Row: 4 -> Col:2->1 Col:3->1\t\n"; - - EXPECT_EQ( printed.str(), couted.str() ); -} - #endif diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h index a853281bef7b1bb9a3cb4985b6a3a53ba519ee45..ab072ab8ad9b4d2ff1237a25301022d424740f96 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -114,14 +114,6 @@ TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest ) test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" ); } - -TYPED_TEST( BinaryMatrixTest_CSR, printTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_Print< CSRMatrixType >(); -} - #endif #include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h index e532c9ff19d154eb86053a2897de7d1d3c9b6ff3..f8cd5f415ddaf07c188e67a7863c6a3930a95e6b 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -125,14 +125,6 @@ TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest ) test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" ); } - -TYPED_TEST( BinaryMatrixTest_Ellpack, printTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_Print< EllpackMatrixType >(); -} - #endif #include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h index e332bc0109bf0d69242658859835fef72a432626..f58a018ac1685dd7ca1443017ed22d2ada1868a0 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -126,13 +126,6 @@ TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest ) test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" ); } -TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_Print< SlicedEllpackMatrixType >(); -} - #endif #include "../main.h" diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h index d311d774d7d06003cb7db510d8115f4eb977540b..0d502cf23d2697172c61a2bc79cc04d3dac8a151 100644 --- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -400,8 +400,8 @@ void tridiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); @@ -449,10 +449,10 @@ void multidiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; - using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; - DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType; + DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 }; const IndexType rows( 10 ), columns( 10 ); MultidiagonalHost hostMatrix( rows, columns, diagonals ); diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index ceed58546e34fe6e91a421d19d142eae23d89784..627bcdf0f8a27779d92d97945f088232ba51f232 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -36,14 +36,16 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; void test_GetSerializationType() { using namespace TNL::Containers::Segments; - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); + std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() << std::endl; + std::cerr << TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() << std::endl; + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, RowMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, RowMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, RowMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, ColumnMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, ColumnMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, ColumnMajorOrder >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, ColumnMajorOrder >" ) ); } template< typename Matrix > @@ -166,7 +168,7 @@ void test_GetCompressedRowLengths() } template< typename Matrix > -void test_GetElementsCount() +void test_GetAllocatedElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -179,7 +181,7 @@ void test_GetElementsCount() m.reset(); m.setDimensions( rows, cols ); - EXPECT_EQ( m.getElementsCount(), 42 ); + EXPECT_EQ( m.getAllocatedElementsCount(), 42 ); } template< typename Matrix > @@ -1365,11 +1367,11 @@ TYPED_TEST( MatrixTest, setElementsTest ) test_SetElements< MatrixType >(); } -TYPED_TEST( MatrixTest, getElementsCountTest ) +TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; - test_GetElementsCount< MatrixType >(); + test_GetAllocatedElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, getNonzeroElementsCountTest ) diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.h b/src/UnitTests/Matrices/LambdaMatrixTest.h index 07d1f336c4d5cc76c74bf19711fc4d2b68d3684b..cc2893d9bc0cc33d6385520a2b7d34e25c0b9907 100644 --- a/src/UnitTests/Matrices/LambdaMatrixTest.h +++ b/src/UnitTests/Matrices/LambdaMatrixTest.h @@ -102,14 +102,6 @@ TYPED_TEST( LambdaMatrixTest, rowsReduction ) test_RowsReduction< LambdaMatrixParametersType >(); } - -TYPED_TEST( LambdaMatrixTest, printTest ) -{ - using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; - - test_Print< LambdaMatrixParametersType >(); -} - #endif #include "../main.h" diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.hpp b/src/UnitTests/Matrices/LambdaMatrixTest.hpp index 23963c11906431fa6f80926cb21c7d5d12913c51..256a84bf7fba0069a9130c92afed76e6249fe547 100644 --- a/src/UnitTests/Matrices/LambdaMatrixTest.hpp +++ b/src/UnitTests/Matrices/LambdaMatrixTest.hpp @@ -94,10 +94,10 @@ void test_GetCompressedRowLengths() MatrixType m( size, size, matrixElements, rowLengths ); TNL::Containers::Vector< IndexType > correctRowLengths{ 1, 3, 3, 3, 1 }; - TNL::Containers::Vector< IndexType > rowLengthsVector; + TNL::Containers::Vector< IndexType, DeviceType > rowLengthsVector; m.getCompressedRowLengths( rowLengthsVector ); for( int i = 0; i < size; i++ ) - EXPECT_EQ( correctRowLengths[ i ], rowLengthsVector[ i ] ); + EXPECT_EQ( correctRowLengths.getElement( i ), rowLengthsVector.getElement( i ) ); } template< typename Matrix > @@ -233,11 +233,11 @@ void test_RowsReduction() TNL::Containers::Vector< RealType, DeviceType, IndexType > v( size, -1.0 ); auto vView = v.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType columnIdx, const RealType& value ) mutable -> RealType { return value; }; - auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { - sum += value; + auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) -> RealType { + return sum + value; }; auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { vView[ row ] = value; @@ -251,56 +251,4 @@ void test_RowsReduction() EXPECT_EQ( v.getElement( 4 ), 1.0 ); } -template< typename Matrix > -void test_Print() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - IndexType size = 5; - auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { - if( rowIdx == 0 || rowIdx == size - 1 ) - return 1; - return 3; - }; - - auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { - if( rowIdx == 0 || rowIdx == size -1 ) - { - columnIdx = rowIdx; - value = 1.0; - } - else - { - columnIdx = rowIdx + localIdx - 1; - value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; - } - }; - - using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); - - MatrixType m( size, size, matrixElements, rowLengths ); - - std::stringstream printed; - std::stringstream couted; - - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); - - - m.print( std::cout ); //all the std::cout goes to ss - - std::cout.rdbuf(old_buf); //reset - - couted << "Row: 0 -> Col:0->1\t\n" - "Row: 1 -> Col:0->1 Col:1->-2 Col:2->1\t\n" - "Row: 2 -> Col:1->1 Col:2->-2 Col:3->1\t\n" - "Row: 3 -> Col:2->1 Col:3->-2 Col:4->1\t\n" - "Row: 4 -> Col:4->1\t\n"; - - EXPECT_EQ( printed.str(), couted.str() ); -} - - #endif // HAVE_GTEST diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index 8ee8c7ffb151d3d0f89f03af29941f5378384874..75aeda8245afb49744fc24fe63273993fdd50795 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -19,11 +19,11 @@ #include #include -using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >; -using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >; +using Multidiagonal_host_float = TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int >; +using Multidiagonal_host_int = TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, int >; -using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >; -using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >; +using Multidiagonal_cuda_float = TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int >; +using Multidiagonal_cuda_int = TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, int >; static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; @@ -35,14 +35,14 @@ static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; void test_GetSerializationType() { using namespace TNL::Containers::Segments; - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, RowMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< float, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::MultidiagonalMatrix< int, [any_device], int, ColumnMajorOrder, [any_allocator], [any_allocator] >" ) ); } template< typename Matrix > @@ -51,14 +51,14 @@ void test_SetDimensions() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; const IndexType rows = 9; const IndexType cols = 8; - const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; + const DiagonalsOffsetsType diagonalsOffsets{ -3, -1, 0, 2, 4 }; Matrix m; - m.setDimensions( rows, cols, diagonalsShifts ); + m.setDimensions( rows, cols, diagonalsOffsets ); EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); @@ -71,17 +71,17 @@ void test_SetLike() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix1::DiagonalsOffsetsType; const IndexType rows = 8; const IndexType cols = 7; - const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; + const DiagonalsOffsetsType diagonalsOffsets{ -3, -1, 0, 2, 4 }; Matrix1 m1; - m1.setDimensions( rows + 1, cols + 2, diagonalsShifts ); + m1.setDimensions( rows + 1, cols + 2, diagonalsOffsets ); Matrix2 m2; - m2.setDimensions( rows, cols, diagonalsShifts ); + m2.setDimensions( rows, cols, diagonalsOffsets ); m1.setLike( m2 ); @@ -90,54 +90,64 @@ void test_SetLike() } template< typename Matrix > -void test_GetNonemptyRowsCount() +void test_SetElements() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - /* - * Sets up the following 5x8 matrix: - * - * / 1 0 0 1 0 1 0 0 \ - * | 0 1 0 0 1 0 1 0 | - * | 1 0 1 0 0 1 0 1 | - * | 0 1 0 1 0 0 1 0 | - * \ 0 0 1 0 1 0 0 1 / - */ - Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); - m1.setValue( 1.0 ); - EXPECT_EQ( m1.getNonemptyRowsCount(), 5 ); - - /* - * Sets up the following 5x5 matrix: - * - * / 1 0 0 1 0 \ - * | 0 1 0 0 1 | - * | 1 0 1 0 0 | - * | 0 1 0 1 0 | - * \ 0 0 1 0 1 / - */ - Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); - m2.setValue( 1.0 ); - EXPECT_EQ( m2.getNonemptyRowsCount(), 5 ); - - /* - * Sets up the following 8x5 matrix: - * - * / 1 0 0 1 0 \ - * | 0 1 0 0 1 | - * | 1 0 1 0 0 | - * | 0 1 0 1 0 | - * | 0 0 1 0 1 | - * | 0 0 0 1 0 | - * | 0 0 0 0 1 | - * \ 0 0 0 0 0 / - */ - Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); - m3.setValue( 1.0 ); - EXPECT_EQ( m3.getNonemptyRowsCount(), 7 ); + const int gridSize( 4 ); + const int matrixSize( gridSize * gridSize ); + Matrix matrix( matrixSize, matrixSize, { - gridSize, -1, 0, 1, gridSize } ); + matrix.setElements( { + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { -1.0, -1.0, 4.0, -1.0, -1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 }, + { 0.0, 0.0, 1.0 } + } ); + + for( int i = 0; i < gridSize; i++ ) + for( int j = 0; j < gridSize; j++ ) + { + const int elementIdx = i * gridSize + j; + if( i == 0 || j == 0 || i == gridSize - 1 || j == gridSize - 1 ) // check matrix elements corresponding to boundary grid nodes + { + for( int k = 0; k < matrixSize; k++ ) + { + if( elementIdx == k ) + EXPECT_EQ( matrix.getElement( elementIdx, k ), 1.0 ); + else + EXPECT_EQ( matrix.getElement( elementIdx, k ), 0.0 ); + } + } + else // check matrix elements corresponding to inner grid nodes + { + for( int k = 0; k < matrixSize; k++ ) + { + if( k == elementIdx - gridSize || + k == elementIdx - 1 || + k == elementIdx + 1 || + k == elementIdx + gridSize ) + EXPECT_EQ( matrix.getElement( elementIdx, k ), -1.0 ); + else if( k == elementIdx ) + EXPECT_EQ( matrix.getElement( elementIdx, k ), 4.0 ); + else + EXPECT_EQ( matrix.getElement( elementIdx, k ), 0.0 ); + } + } + } } template< typename Matrix > @@ -146,7 +156,7 @@ void test_GetCompressedRowLengths() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 8x8 matrix: @@ -164,7 +174,7 @@ void test_GetCompressedRowLengths() const IndexType rows = 8; const IndexType cols = 8; - Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + Matrix m( rows, cols, DiagonalsOffsetsType({ -2, 0, 3, 5 }) ); m.setValue( 1.0 ); m.setElement( 0, 0, 0.0 ); m.setElement( 7, 7, 0.0 ); @@ -182,28 +192,28 @@ void test_GetAllocatedElementsCount() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; const IndexType rows = 7; const IndexType cols = 6; - Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + Matrix m1( 7, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) ); EXPECT_EQ( m1.getAllocatedElementsCount(), 28 ); - Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + Matrix m2( 8, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) ); EXPECT_EQ( m2.getAllocatedElementsCount(), 32 ); - Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + Matrix m3( 9, 6, DiagonalsOffsetsType( { -2, 0, 3, 5 } ) ); EXPECT_EQ( m3.getAllocatedElementsCount(), 32 ); } template< typename Matrix > -void test_GetNumberOfNonzeroMatrixElements() +void test_GetNonzeroElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 7x6 matrix: @@ -221,11 +231,11 @@ void test_GetNumberOfNonzeroMatrixElements() const IndexType rows = 7; const IndexType cols = 6; - Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + Matrix m( rows, cols, DiagonalsOffsetsType( { -3, 0, 2, 4 } ) ); m.setValue( 1.0 ); m.setElement( 0, 0, 0.0 ); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); + EXPECT_EQ( m.getNonzeroElementsCount(), 15 ); } template< typename Matrix > @@ -234,7 +244,7 @@ void test_Reset() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x4 matrix: @@ -248,7 +258,7 @@ void test_Reset() const IndexType rows = 5; const IndexType cols = 4; - Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) ); + Matrix m( rows, cols, DiagonalsOffsetsType( { 0, 1, 2, 4 } ) ); m.reset(); EXPECT_EQ( m.getRows(), 0 ); @@ -261,7 +271,7 @@ void test_SetValue() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 7x6 matrix: @@ -277,7 +287,7 @@ void test_SetValue() const IndexType rows = 7; const IndexType cols = 6; - Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + Matrix m( rows, cols, DiagonalsOffsetsType( { -3, 0, 2, 4 } ) ); m.setValue( 1.0 ); EXPECT_EQ( m.getElement( 0, 0 ), 1 ); @@ -336,7 +346,7 @@ void test_SetElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x5 matrix: @@ -349,7 +359,7 @@ void test_SetElement() */ const IndexType rows = 5; const IndexType cols = 5; - DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + DiagonalsOffsetsType diagonals{-3, 0, 1, 4 }; Matrix m( rows, cols, diagonals ); RealType value = 1; @@ -399,7 +409,7 @@ void test_AddElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x5 matrix: @@ -412,7 +422,7 @@ void test_AddElement() */ const IndexType rows = 5; const IndexType cols = 5; - DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + DiagonalsOffsetsType diagonals{-3, 0, 1, 4 }; Matrix m( rows, cols, diagonals ); RealType value = 1; @@ -520,7 +530,7 @@ void test_SetRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x7 matrix: @@ -534,7 +544,7 @@ void test_SetRow() const IndexType rows = 5; const IndexType cols = 7; - Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) ); + Matrix m( rows, cols, DiagonalsOffsetsType({ -1, 0, 2, 4 }) ); auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { @@ -597,7 +607,7 @@ void test_AddRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 6x5 matrix: @@ -612,7 +622,7 @@ void test_AddRow() const IndexType rows = 6; const IndexType cols = 5; - DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } ); + DiagonalsOffsetsType diagonals( { -2, 0, 1, 2 } ); Matrix m( rows, cols, diagonals ); @@ -736,7 +746,7 @@ void test_VectorProduct() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x4 matrix: @@ -749,7 +759,7 @@ void test_VectorProduct() */ const IndexType rows = 5; const IndexType cols = 4; - DiagonalsShiftsType diagonals{ -2, 0, 2 }; + DiagonalsOffsetsType diagonals{ -2, 0, 2 }; Matrix m( rows, cols, diagonals ); @@ -785,8 +795,8 @@ void test_AddMatrix() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType; - using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType; + using DiagonalsOffsetsType1 = typename Matrix1::DiagonalsOffsetsType; + using DiagonalsOffsetsType2 = typename Matrix2::DiagonalsOffsetsType; /* * Sets up the following 5x4 matrix: @@ -799,8 +809,8 @@ void test_AddMatrix() */ const IndexType rows = 5; const IndexType cols = 4; - DiagonalsShiftsType1 diagonals1; - DiagonalsShiftsType2 diagonals2; + DiagonalsOffsetsType1 diagonals1; + DiagonalsOffsetsType2 diagonals2; Matrix1 m( rows, cols, diagonals1 ); @@ -908,7 +918,7 @@ void test_GetMatrixProduct() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 5x4 matrix: * @@ -920,9 +930,9 @@ void test_GetMatrixProduct() */ const IndexType leftRows = 5; const IndexType leftCols = 4; - DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } ); - Matrix leftMatrix( leftRows, leftCols, diagonalsShifts ); + Matrix leftMatrix( leftRows, leftCols, diagonalsOffsets ); RealType value = 1; for( IndexType i = 0; i < leftRows; i++ ) @@ -959,7 +969,7 @@ void test_GetMatrixProduct() * \ 0 0 0 0 / */ - Matrix mResult( leftRows, rightCols, diagonalsShifts ); + Matrix mResult( leftRows, rightCols, diagonalsOffsets ); mResult.setValue( 0 ); RealType leftMatrixMultiplicator = 1; @@ -1011,7 +1021,7 @@ void test_GetTransposition() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 3x2 matrix: * @@ -1021,9 +1031,9 @@ void test_GetTransposition() */ const IndexType rows = 3; const IndexType cols = 2; - DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } ); - Matrix m( rows, cols, diagonalsShifts ); + Matrix m( rows, cols, diagonalsOffsets ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) @@ -1038,7 +1048,7 @@ void test_GetTransposition() * / 0 0 0 \ * \ 0 0 0 / */ - Matrix mTransposed( cols, rows, diagonalsShifts ); + Matrix mTransposed( cols, rows, diagonalsOffsets ); mTransposed.print( std::cout ); @@ -1071,7 +1081,7 @@ void test_PerformSORIteration() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 4x4 matrix: * @@ -1082,9 +1092,9 @@ void test_PerformSORIteration() */ const IndexType rows = 4; const IndexType cols = 4; - DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + DiagonalsOffsetsType diagonalsOffsets( { 0, 1, 2 } ); - Matrix m( rows, cols, diagonalsShifts ); + Matrix m( rows, cols, diagonalsOffsets ); m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0 ); @@ -1147,36 +1157,36 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization(); - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, organization >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, + using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType, organization >; + using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType, organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); - DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } ); - MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts ); + DiagonalsOffsetsType diagonalsOffsets( { -4, -2, 0, 2, 3, 5 } ); + MultidiagonalHost hostMatrix( rows, columns, diagonalsOffsets ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) - if( diagonalsShifts.containsValue( j - i ) ) + if( diagonalsOffsets.containsValue( j - i ) ) hostMatrix.setElement( i, j, i + j ); - Matrix matrix( rows, columns, diagonalsShifts ); + Matrix matrix( rows, columns, diagonalsOffsets ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) - if( diagonalsShifts.containsValue( j - i ) ) + if( diagonalsOffsets.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); #ifdef HAVE_CUDA - MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts ); + MultidiagonalCuda cudaMatrix( rows, columns, diagonalsOffsets ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) - if( diagonalsShifts.containsValue( j - i ) ) + if( diagonalsOffsets.containsValue( j - i ) ) cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; @@ -1184,7 +1194,7 @@ void test_AssignmentOperator() for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) { - if( diagonalsShifts.containsValue( j - i ) ) + if( diagonalsOffsets.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); @@ -1199,7 +1209,7 @@ void test_SaveAndLoad() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + using DiagonalsOffsetsType = typename Matrix::DiagonalsOffsetsType; /* * Sets up the following 4x4 matrix: @@ -1211,15 +1221,15 @@ void test_SaveAndLoad() */ const IndexType rows = 4; const IndexType cols = 4; - DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); + DiagonalsOffsetsType diagonalsOffsets( { -1, 0, 1 } ); - Matrix savedMatrix( rows, cols, diagonalsShifts ); + Matrix savedMatrix( rows, cols, diagonalsOffsets ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) { - if( diagonalsShifts.containsValue( j - i ) ) + if( diagonalsOffsets.containsValue( j - i ) ) savedMatrix.setElement( i, j, value ); value++; } @@ -1271,56 +1281,6 @@ void test_SaveAndLoad() EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); } -template< typename Matrix > -void test_Print() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - - /* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 0 0 \ - * | 5 6 7 0 | - * | 0 10 11 12 | - * | 0 0 15 16 | - * \ 0 0 0 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; - DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); - - Matrix m( rows, cols, diagonalsShifts ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++) - for( IndexType j = 0; j < cols; j++) - { - if( abs( i - j ) <= 1 ) - m.setElement( i, j, value ); - value++; - } - - std::stringstream printed; - std::stringstream couted; - - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); - - m.print( std::cout ); //all the std::cout goes to ss - - std::cout.rdbuf(old_buf); //reset - couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" - "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" - "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" - "Row: 3 -> Col:2->15\t Col:3->16\t\n" - "Row: 4 -> Col:3->20\t\n"; - - EXPECT_EQ( printed.str(), couted.str() ); -} - // test fixture for typed tests template< typename Matrix > class MatrixTest : public ::testing::Test @@ -1332,31 +1292,31 @@ protected: // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types < - TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, short >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, short >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, short >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >, - TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, int >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >, - TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, long >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, long >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, long >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long > + TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, short >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Host, short >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, short >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, short >, + TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, int >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Host, int >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, int >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, int >, + TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Host, long >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Host, long >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Host, long >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, short >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, short >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, short >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >, - TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, int >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >, - TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, long >, - TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, long >, - TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, long >, - TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, short >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Cuda, short >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, short >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, short >, + TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, int >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Cuda, int >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, int >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, int >, + TNL::Matrices::MultidiagonalMatrix< int, TNL::Devices::Cuda, long >, + TNL::Matrices::MultidiagonalMatrix< long, TNL::Devices::Cuda, long >, + TNL::Matrices::MultidiagonalMatrix< float, TNL::Devices::Cuda, long >, + TNL::Matrices::MultidiagonalMatrix< double, TNL::Devices::Cuda, long > #endif >; @@ -1381,14 +1341,13 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } -TYPED_TEST( MatrixTest, getNonemptyRowsCountTest ) +TYPED_TEST( MatrixTest, setElements ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNonemptyRowsCount< MatrixType >(); + test_SetElements< MatrixType >(); } - TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1407,7 +1366,7 @@ TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNumberOfNonzeroMatrixElements< MatrixType >(); + test_GetNonzeroElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, resetTest ) @@ -1480,13 +1439,6 @@ TYPED_TEST( MatrixTest, saveAndLoadTest ) test_SaveAndLoad< MatrixType >(); } -TYPED_TEST( MatrixTest, printTest ) -{ - using MatrixType = typename TestFixture::MatrixType; - - test_Print< MatrixType >(); -} - /*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host ) { bool testRan = false; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 6f8a142a673f6e206d6fa589145706c55aed3ec5..ffcca4b606c9d665c9442dd0488d447a73adef30 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -436,8 +436,8 @@ void tridiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); @@ -486,10 +486,10 @@ void multidiagonalMatrixAssignment() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; - using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; - DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + using MultidiagonalHost = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::MultidiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsOffsetsType = typename MultidiagonalHost::DiagonalsOffsetsType; + DiagonalsOffsetsType diagonals{ -4, -2, 0, 1, 3, 5 }; const IndexType rows( 10 ), columns( 10 ); MultidiagonalHost hostMatrix( rows, columns, diagonals ); diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index a00e696871b4393672cc6c2475fb961f08187b8d..98e779daea91af6f521af04fbc402706eed14d8f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -108,11 +108,4 @@ TYPED_TEST( MatrixTest, saveAndLoadTest ) test_SaveAndLoad< MatrixType >( saveAndLoadFileName ); } - -TYPED_TEST( MatrixTest, printTest ) -{ - using MatrixType = typename TestFixture::MatrixType; - - test_Print< MatrixType >(); -} #endif \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 6d7c6436058cec31efa1fe4e64cc4e27238a67a5..38abdf07e0f105fee9503703578cb846983b5158 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -320,7 +320,7 @@ void test_SetLike() } template< typename Matrix > -void test_GetNumberOfNonzeroMatrixElements() +void test_GetNonzeroElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -369,7 +369,7 @@ void test_GetNumberOfNonzeroMatrixElements() for( IndexType i = 0; i < cols; i++ ) m.setElement( j, i, value++ ); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); + EXPECT_EQ( m.getNonzeroElementsCount(), 41 ); } template< typename Matrix > @@ -1374,7 +1374,7 @@ void test_RowsReduction() // Compute number of non-zero elements in rows. typename Matrix::RowsCapacitiesType rowLengths( rows ); auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { @@ -1389,7 +1389,7 @@ void test_RowsReduction() // Compute max norm TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); auto rowSums_view = rowSums.getView(); - auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { return abs( value ); }; auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { @@ -1557,62 +1557,4 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( std::remove( filename ), 0 ); } -template< typename Matrix > -void test_Print() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * | 0 8 9 10 | - * \ 0 0 11 12 / - */ - - const IndexType m_rows = 5; - const IndexType m_cols = 4; - - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - m.setElement( 1, 3, value++ ); // 1st row - - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, value++ ); - - std::stringstream printed; - std::stringstream couted; - - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); - - m.print( std::cout ); //all the std::cout goes to ss - - std::cout.rdbuf(old_buf); //reset - - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" - "Row: 1 -> Col:3->4\t\n" - "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" - "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" - "Row: 4 -> Col:2->11 Col:3->12\t\n"; - - EXPECT_EQ( printed.str(), couted.str() ); -} - #endif diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index 02fd8c585366f4da12d1218a28adca717dd2cdf2..3c40937851780fff92b6a3a6c33cc801310d00c3 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -45,11 +45,11 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } -TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements ) +TYPED_TEST( MatrixTest, getNonzeroElementsCount ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNumberOfNonzeroMatrixElements< MatrixType >(); + test_GetNonzeroElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, resetTest ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index 4e28842ba066ea5f794d8a279dacda09fbad1a85..c316440d1719e67e0a3e612963df4e7e93caeb48 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -163,7 +163,7 @@ void test_SetLike() } template< typename Matrix > -void test_GetNumberOfNonzeroMatrixElements() +void test_GetNonzeroElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -204,7 +204,7 @@ void test_GetNumberOfNonzeroMatrixElements() { 10, 2, 28 }, { 10, 4, 29 }, { 10, 10, 30 } } ); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 ); + EXPECT_EQ( m.getNonzeroElementsCount(), 49 ); } template< typename Matrix > @@ -898,7 +898,7 @@ void test_RowsReduction() typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } ); auto rowLengths_view = rowLengths.getView(); rowLengths_view = 0; - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) mutable -> IndexType { if( value != 0.0 && row != column) TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 ); return ( value != 0.0 ); diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index bb613a17db750954c0310ade3beb830b6b745108..4c68db45a89334119494db0832a3d53e76dcd93e 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include @@ -20,11 +20,11 @@ #include #include -using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >; -using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >; +using Tridiagonal_host_float = TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int >; +using Tridiagonal_host_int = TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, int >; -using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >; -using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >; +using Tridiagonal_cuda_float = TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int >; +using Tridiagonal_cuda_int = TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, int >; static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl"; @@ -36,14 +36,14 @@ static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl"; void test_GetSerializationType() { using namespace TNL::Containers::Segments; - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::TridiagonalMatrix< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > @@ -139,28 +139,6 @@ void test_GetCompressedRowLengths() EXPECT_EQ( rowLengths, correctRowLengths ); } -template< typename Matrix > -void test_GetRowLength() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - const IndexType rows = 8; - const IndexType cols = 7; - - Matrix m( rows, cols ); - - EXPECT_EQ( m.getRowLength( 0 ), 2 ); - EXPECT_EQ( m.getRowLength( 1 ), 3 ); - EXPECT_EQ( m.getRowLength( 2 ), 3 ); - EXPECT_EQ( m.getRowLength( 3 ), 3 ); - EXPECT_EQ( m.getRowLength( 4 ), 3 ); - EXPECT_EQ( m.getRowLength( 5 ), 3 ); - EXPECT_EQ( m.getRowLength( 6 ), 2 ); - EXPECT_EQ( m.getRowLength( 7 ), 1 ); -} - template< typename Matrix > void test_GetAllocatedElementsCount() { @@ -177,7 +155,7 @@ void test_GetAllocatedElementsCount() } template< typename Matrix > -void test_GetNumberOfNonzeroMatrixElements() +void test_GetNonzeroElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -206,7 +184,7 @@ void test_GetNumberOfNonzeroMatrixElements() m.setElement( 5, 5, 0); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); + EXPECT_EQ( m.getNonzeroElementsCount(), 15 ); } template< typename Matrix > @@ -579,14 +557,12 @@ void test_SetRow() auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { RealType values[ 3 ][ 3 ] { - { 1, 2, 0 }, + { 0, 1, 2 }, { 8, 9, 10 }, { 16, 17, 18 } }; auto row = matrix_view.getRow( rowIdx ); for( IndexType i = 0; i < 3; i++ ) { - if( rowIdx == 0 && i > 1 ) - break; row.setElement( i, values[ rowIdx ][ i ] ); } }; @@ -700,7 +676,7 @@ void test_AddRow() auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { RealType values[ 6 ][ 3 ] { - { 11, 11, 0 }, + { 0, 11, 11 }, { 22, 22, 22 }, { 33, 33, 33 }, { 44, 44, 44 }, @@ -1170,8 +1146,8 @@ void test_AssignmentOperator() using IndexType = typename Matrix::IndexType; constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization(); - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, organization >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, + using TridiagonalHost = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Host, IndexType, organization >; + using TridiagonalCuda = TNL::Matrices::TridiagonalMatrix< RealType, TNL::Devices::Cuda, IndexType, organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); @@ -1347,31 +1323,31 @@ protected: // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types < - TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, short >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, short >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, short >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >, - TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, int >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >, - TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, long >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, long >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, long >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long > + TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, short >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Host, short >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, short >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, short >, + TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, int >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Host, int >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, int >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, int >, + TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Host, long >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Host, long >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Host, long >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, short >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, short >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, short >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >, - TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, int >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >, - TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, long >, - TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, long >, - TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, long >, - TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, short >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Cuda, short >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, short >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, short >, + TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, int >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Cuda, int >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, int >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, int >, + TNL::Matrices::TridiagonalMatrix< int, TNL::Devices::Cuda, long >, + TNL::Matrices::TridiagonalMatrix< long, TNL::Devices::Cuda, long >, + TNL::Matrices::TridiagonalMatrix< float, TNL::Devices::Cuda, long >, + TNL::Matrices::TridiagonalMatrix< double, TNL::Devices::Cuda, long > #endif >; @@ -1403,13 +1379,6 @@ TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) test_GetCompressedRowLengths< MatrixType >(); } -TYPED_TEST( MatrixTest, getRowLengthTest ) -{ - using MatrixType = typename TestFixture::MatrixType; - - test_GetRowLength< MatrixType >(); -} - TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1417,11 +1386,11 @@ TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) test_GetAllocatedElementsCount< MatrixType >(); } -TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) +TYPED_TEST( MatrixTest, getNonzeroElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNumberOfNonzeroMatrixElements< MatrixType >(); + test_GetNonzeroElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, resetTest ) @@ -1488,7 +1457,7 @@ TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; using RealAllocatorType = typename MatrixType::RealAllocatorType; - using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, + using MatrixType2 = TNL::Matrices::TridiagonalMatrix< RealType, DeviceType, IndexType, MatrixType::getOrganization() == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder, RealAllocatorType >;