From c2514bc6b8b76a13d73f947ae5d7a67d4d7204c5 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Fri, 17 Jan 2020 17:40:18 +0100 Subject: [PATCH] Debuging assignment of tridiagonal matrix to sparse matrix. --- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 59 +++++++--- src/TNL/Matrices/TridiagonalMatrixRowView.hpp | 2 +- src/TNL/Matrices/TridiagonalMatrixView.h | 2 + src/TNL/Matrices/TridiagonalMatrixView.hpp | 17 +-- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 105 +++++++++++++++++- 6 files changed, 155 insertions(+), 32 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 43ea25bf58..26d5d2d840 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - ViewType getView(); + ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 14495ad3d5..6d3d9d8b8b 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -82,13 +82,13 @@ template< typename Real, typename IndexAllocator > auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getView() -> ViewType +getView() const -> ViewType { return ViewType( this->getRows(), this->getColumns(), - this->getValues().getView(), - this->columnIndexes.getView(), - this->segments.getView() ); + const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast + const_cast< SparseMatrix* >( this )->columnIndexes.getView(), + const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, @@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& thisValuesBuffer_view = matrixValuesBuffer_view; //// - // Copy matrix elements from the buffer to the matrix + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements. const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); @@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix ) using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; - using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; + using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >; - typename RHSMatrix::RowsCapacitiesType rowLengths; + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; + // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; - /*if( std::is_same< DeviceType, RHSDeviceType >::value ) + if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); if( columnIndex != paddingIndex ) { - IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; values_view[ thisGlobalIdx ] = value; + rowLocalIndexes_view[ rowIdx ] = localIdx; } }; matrix.forAllRows( f ); } - else*/ + else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); @@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix ) thisColumnsBuffer_view = matrixColumnsBuffer_view; //// - // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; - if( column != paddingIndex ) + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements + const IndexType matrix_columns = this->getColumns(); + auto matrix_view = matrix.getView(); + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); + auto matrixRow = matrix_view.getRow( rowIdx ); + while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns ) { - columnIndex = column; - value = thisValuesBuffer_view[ bufferIdx ]; + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + rowLocalIndexes_view[ rowIdx ] = localIdx; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; + value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp index ba60876b9b..80fc1a26d5 100644 --- a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp @@ -29,7 +29,7 @@ auto TridiagonalMatrixRowView< ValuesView, Indexer >:: getSize() const -> IndexType { - return indexer.getRowSize(); + return indexer.getRowSize( rowIdx ); } template< typename ValuesView, typename Indexer > diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 7db517cbd3..61b005c5a7 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index d8fa6061c3..7fc5fd6b7b 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); } else if( rowIdx + 1 < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); } else if( rowIdx < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); } else - function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index e9898bb393..b285f5e058 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -15,6 +15,8 @@ #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Matrices/MatrixType.h> #include <TNL/Matrices/Dense.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Matrices/Multidiagonal.h> #include <TNL/Containers/Segments/CSR.h> #include <TNL/Containers/Segments/Ellpack.h> #include <TNL/Containers/Segments/SlicedEllpack.h> @@ -370,6 +372,7 @@ void testCopyAssignment() Matrix2 triDiag2; triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); checkTriDiagMatrix( triDiag2 ); } { @@ -390,6 +393,7 @@ void testCopyAssignment() Matrix2 unevenRowSize2; unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); } } @@ -435,6 +439,62 @@ void testConversion() } } +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ ) + hostMatrix.setElement( i, j, i + j ); + + std::cerr << hostMatrix << std::endl; + Matrix matrix; + matrix = hostMatrix; + std::cerr << matrix << std::endl; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + /*for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ ) + cudaMatrix.setElement( i, j, i + j );*/ + + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif + +} + template< typename Matrix > void denseMatrixAssignment() { @@ -469,10 +529,10 @@ void denseMatrixAssignment() #ifdef HAVE_CUDA DenseCuda cudaMatrix( rows, columns ); - //cudaMatrix = hostMatrix; - for( IndexType i = 0; i < columns; i++ ) + cudaMatrix = hostMatrix; + /*for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j ); + cudaMatrix.setElement( i, j, i + j );*/ matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); @@ -487,7 +547,7 @@ void denseMatrixAssignment() } #endif } - +/* TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); @@ -619,6 +679,43 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) testConversion< SE_cuda, E_cuda >(); } #endif +*/ + +//// +// Tridiagonal matrix assignment test +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host ) +{ + tridiagonalMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host ) +{ + tridiagonalMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + tridiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda ) +{ + tridiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + tridiagonalMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + tridiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + + // Dense matrix assignment test TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) -- GitLab