From ad33eecb09bf3f85f18eeb921e8a2075203d3ec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Mon, 6 Jan 2020 21:20:22 +0100 Subject: [PATCH] Implemented dense to sparse matrix assignement. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 4 +- src/TNL/Matrices/Dense.h | 16 +---- src/TNL/Matrices/Dense.hpp | 40 ++++-------- src/TNL/Matrices/SparseMatrix.h | 1 + src/TNL/Matrices/SparseMatrix.hpp | 62 ++++++++++--------- src/TNL/Matrices/SparseMatrixView.hpp | 2 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 18 +++--- 8 files changed, 66 insertions(+), 79 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 971754b5a4..3581748fad 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -218,7 +218,7 @@ void CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 31f417df26..62e2ca7d58 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes ) const auto sizes_view = sizes.getConstView(); auto slices_view = this->sliceOffsets.getView(); auto slice_segment_size_view = this->sliceSegmentSizes.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { if( globalIdx < _size ) return sizes_view[ globalIdx ]; return 0; @@ -341,7 +341,7 @@ void SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 2e283c9e89..757fa4eae4 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -30,15 +30,6 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { - private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - - // friend class will be needed for templated assignment operators - //template< typename Real2, typename Device2, typename Index2 > - //friend class Dense; - public: using RealType = Real; using DeviceType = Device; @@ -176,12 +167,11 @@ class Dense : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment - Dense& operator=( const Dense& matrix ); + //Dense& operator=( const Dense& matrix ); // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > + Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index ecd5aec1c9..7517c6b0ea 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -373,7 +373,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); @@ -959,39 +959,25 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } - -// copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix ) -{ - this->setLike( matrix ); - this->values = matrix.values; - return *this; -} - -// cross-device copy assignment -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > - template< typename Real2, typename Device2, typename Index2, typename > -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, - "unknown device" ); - - this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); + if( RowMajorOrder == RowMajorOrder_ ) + { + this->setLike( matrix ); + this->values = matrix.getValues(); + } + else + { + + } } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 75d9179282..44883a124d 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -16,6 +16,7 @@ #include <TNL/Containers/Segments/CSR.h> #include <TNL/Matrices/SparseMatrixRowView.h> #include <TNL/Matrices/SparseMatrixView.h> +#include <TNL/Matrices/Dense.h> namespace TNL { namespace Matrices { diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index d38b9de34a..6aa75995f5 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -448,10 +448,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) - return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); @@ -594,7 +594,7 @@ performSORIteration( const Vector1& b, Vector2& x, const RealType& omega ) const { - + return false; } // copy assignment @@ -624,7 +624,8 @@ template< typename Real, typename IndexAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& -SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) { using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; using RHSIndexType = typename RHSMatrix::IndexType; @@ -632,27 +633,29 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; - typename RHSMatrix::RowsCapacitiesType rowLengths; + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); - this->setDimensions( matrix.getRows(), matrix.getColumns() ); + this->setLike( matrix ); this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; if( std::is_same< DeviceType, RHSDeviceType >::value ) { - const auto this_segments_view = this->segments.getView(); const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { - if( columnIndex != paddingIndex ) + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value ) mutable { + if( value != 0.0 ) { - IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); - columns_view[ thisGlobalIdx ] = columnIndex; + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); + columns_view[ thisGlobalIdx ] = columnIdx; values_view[ thisGlobalIdx ] = value; } }; @@ -660,15 +663,13 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea } else { - const IndexType maxRowLength = max( rowLengths ); + const IndexType maxRowLength = matrix.getColumns(); const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); - Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); - auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); @@ -678,34 +679,40 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); thisColumnsBuffer = paddingIndex; - matrixColumnsBuffer_view = paddingIndex; //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { - if( columnIndex != paddingIndex ) - { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; - matrixValuesBuffer_view[ bufferIdx ] = value; - } + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; - thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy matrix elements from the buffer to the matrix + const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; - if( column != paddingIndex ) + RealType inValue( 0.0 ); + IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] ); + while( inValue == 0.0 && column < matrix_columns ) { - columnIndex = column; - value = thisValuesBuffer_view[ bufferIdx ]; + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + column++; + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + rowLocalIndexes_view[ rowIdx ] = column; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = column - 1; + value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); @@ -749,7 +756,6 @@ operator=( const RHSMatrix& matrix ) if( std::is_same< DeviceType, RHSDeviceType >::value ) { - const auto this_segments_view = this->segments.getView(); const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { if( columnIndex != paddingIndex ) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index df136388eb..d836fe5e97 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -494,7 +494,7 @@ performSORIteration( const Vector1& b, Vector2& x, const RealType& omega ) const { - + return false; } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 6c4f8b2615..8677443b29 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -443,22 +443,22 @@ void denseMatrixAssignment() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; - + const IndexType rows( 10 ), columns( 10 ); DenseHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) hostMatrix( i, j ) = i + j; - + Matrix matrix; matrix = hostMatrix; using RowCapacitiesType = typename Matrix::RowsCapacitiesType; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); - RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; EXPECT_EQ( rowCapacities, exactRowLengths ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) @@ -468,10 +468,14 @@ void denseMatrixAssignment() else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } - + #ifdef HAVE_CUDA - DenseCuda cudaMatrix; - cudaMatrix = hostMatrix; + DenseCuda cudaMatrix( rows, columns ); + //cudaMatrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); EXPECT_EQ( rowCapacities, exactRowLengths ); -- GitLab