From 65d9b74cec3690588bce4dd3786c8a84e81565bc Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Mon, 6 Jan 2020 14:54:04 +0100 Subject: [PATCH] I --- src/TNL/Containers/Segments/CSR.hpp | 3 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 6 +- src/TNL/Matrices/Dense.h | 6 + src/TNL/Matrices/Dense.hpp | 27 +++++ src/TNL/Matrices/SparseMatrix.h | 7 ++ src/TNL/Matrices/SparseMatrix.hpp | 107 +++++++++++++++++- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 85 +++++++++++++- 7 files changed, 234 insertions(+), 7 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 8b8ddfff51..971754b5a4 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -225,8 +225,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType j = begin; j < end && compute; j++ ) - reduction( aux, fetch( i, j, compute, args... ) ); + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 76790f3933..31f417df26 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -354,8 +354,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -370,8 +371,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 90aa57170b..2e283c9e89 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -183,6 +183,12 @@ class Dense : public Matrix< Real, Device, Index > typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + void save( const String& fileName ) const; void load( const String& fileName ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index fe11d67593..ecd5aec1c9 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -994,6 +994,33 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Den throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +{ + return( this->getRows() == matrix.getRows() && + this->getColumns() == matrix.getColumns() && + this->getValues() == matrix.getValues() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +{ + return ! ( *this == matrix ); +} template< typename Real, typename Device, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index c50f716123..75d9179282 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -175,6 +175,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > */ SparseMatrix& operator=( const SparseMatrix& matrix ); + /** + * \brief Assignment of dense matrix + */ + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > + SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ); + + /** * \brief Assignment of any other matrix type. * @param matrix diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 6c0655ce01..d38b9de34a 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -183,7 +183,7 @@ getCompressedRowLengths( Vector& rowLengths ) const rowLengths.setSize( this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { @@ -448,7 +448,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); @@ -615,7 +615,108 @@ operator=( const SparseMatrix& matrix ) return *this; } -// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +{ + using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + typename RHSMatrix::RowsCapacitiesType rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); + this->setCompressedRowLengths( rowLengths ); + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + columns_view = paddingIndex; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto this_segments_view = this->segments.getView(); + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) + { + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); + columns_view[ thisGlobalIdx ] = columnIndex; + values_view[ thisGlobalIdx ] = value; + } + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = paddingIndex; + matrixColumnsBuffer_view = paddingIndex; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; + } + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; + if( column != paddingIndex ) + { + columnIndex = column; + value = thisValuesBuffer_view[ bufferIdx ]; + } + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + //std::cerr << "This matrix = " << std::endl << *this << std::endl; + } + return *this; + +} + template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index f00daf1f3b..6c4f8b2615 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -14,6 +14,7 @@ #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Matrices/MatrixType.h> +#include <TNL/Matrices/Dense.h> #include <TNL/Containers/Segments/CSR.h> #include <TNL/Containers/Segments/Ellpack.h> #include <TNL/Containers/Segments/SlicedEllpack.h> @@ -436,6 +437,55 @@ void testConversion() } } +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix; + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); @@ -568,6 +618,39 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) } #endif -#endif +// Dense matrix assignment test +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) +{ + denseMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host ) +{ + denseMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host ) +{ + denseMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda ) +{ + denseMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda ) +{ + denseMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda ) +{ + denseMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST #include "../main.h" -- GitLab