From cbf4c7a3ab01d1cab9ff0f7ae0b6df0de3edfb30 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 29 Nov 2019 16:28:38 +0100 Subject: [PATCH 001/179] Added Segments. --- src/TNL/Containers/Segments.h | 29 ++++++++ src/TNL/Containers/Segments/CSR.h | 83 +++++++++++++++++++++ src/TNL/Containers/Segments/CSR.hpp | 110 ++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 src/TNL/Containers/Segments.h create mode 100644 src/TNL/Containers/Segments/CSR.h create mode 100644 src/TNL/Containers/Segments/CSR.hpp diff --git a/src/TNL/Containers/Segments.h b/src/TNL/Containers/Segments.h new file mode 100644 index 000000000..99ea22357 --- /dev/null +++ b/src/TNL/Containers/Segments.h @@ -0,0 +1,29 @@ +/*************************************************************************** + Segments.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Containers { + +template< typename Value, + typename Organization > +class Segments +{ + public: + + using ValueType = Value; + using OrganizationType = Organization; + using IndexType = typename Organization::IndexType; + +}; + +} // namespace Conatiners +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h new file mode 100644 index 000000000..3aa53e76c --- /dev/null +++ b/src/TNL/Containers/Segments/CSR.h @@ -0,0 +1,83 @@ +/*************************************************************************** + CSR.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +class Segments +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + + CSR(); + + CSR( const SizesHolder& sizes ); + + CSR( const CSR& csr ); + + CSR( const CSR&& csr ); + + /** + * \brief Set number of segments + */ + //void setSegmentsCount(); + + /** + * \brief Set sizes of particular segmenets. + */ + template< typename SizesHolder = OffsetsHolder > + void setSizes( const SizesHolder& sizes ) + + /** + * \brief Number segments. + */ + Index getSize() const; + + Index getStorageSize() const; + + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args' + */ + template< typename Function, typename... Args > + void forAll( Function& f, Args args ) const; + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > + void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args args ); + + protected: + + OffsetsHolder offsets; + +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include \ No newline at end of file diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp new file mode 100644 index 000000000..ea45b40ba --- /dev/null +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -0,0 +1,110 @@ +/*************************************************************************** + CSR.hpp - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +CSR< Device, Index >:: +CSR() +{ +} + +template< typename Device, + typename Index > +CSR< Device, Index >:: +CSR( const CSR& csr ) : offsets( csr.offsets ) +{ +} + +template< typename Device, + typename Index > +CSR< Device, Index >:: +CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) +{ + +} + +template< typename Device, + typename Index > +CSR< Device, Index >:: +void setSegmentsCount( const IndexType& size ) +{ + this->offsets.setSize( size + 1 ); +} + +template< typename Device, + typename Index > + template< typename SizesHolder = OffsetsHolder > +CSR< Device, Index >:: +void setSizes( const SizesHolder& sizes ) +{ + this->offsets.setSize( sizes.getSize() + 1 ); + auto view = this->offsets.getView( 0, sizes.getSize() ); + view = sizes; + this->offsets.setElement( sizes.getSize>(), 0 ); + this->offsets.scan< Algorithms::ScanType::Exclusive >(); +} + +template< typename Device, + typename Index > +CSR< Device, Index >:: +Index getSize() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +CSR< Device, Index >:: +void forAll( Function& f, Args args ) const +{ + const auto offsetsView = this->offsets.getView(); + auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + for( IndexType j = begin; j < end; j++ ) + f( i, j, args ); + }; + Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +CSR< Device, Index >:: +void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args ) +{ + const auto offsetsView = this->offsets.getView(); + auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + Real aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); +} + + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL \ No newline at end of file -- GitLab From 4fb3337049762632a0d0ccf35e0483f0c4413fa4 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 29 Nov 2019 16:29:11 +0100 Subject: [PATCH 002/179] Added SparseMatrix to be implemented using the Segments. --- src/TNL/Matrices/SparseMatrix.h | 163 ++++++++++++++++++++++ src/TNL/Matrices/SparseMatrix.hpp | 221 ++++++++++++++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 src/TNL/Matrices/SparseMatrix.h create mode 100644 src/TNL/Matrices/SparseMatrix.hpp diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h new file mode 100644 index 000000000..acca39bf4 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrix.h @@ -0,0 +1,163 @@ +/*************************************************************************** + SparseMatrix.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Organization > +class SparseMatrix : public Matrix< Real, typename Organization::Device, typename Organization::Index > +{ + public: + + using RealType = Real; + using OrganizationType = Organization; + using DeviceType = typename Organization::DeviceType; + using IndexType = typename Organization::IndexType; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + void setDimensions( const IndexType rows, + const IndexType columns ); + + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + + IndexType getRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + + IndexType getNonZeroRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getNonZeroRowLengthFast( const IndexType row ) const; + + template< typename Real2, typename Device2, typename Index2 > + void setLike( const CSR< Real2, Device2, Index2 >& matrix ); + + void reset(); + + __cuda_callable__ + bool setElementFast( const IndexType row, + const IndexType column, + const RealType& value ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + __cuda_callable__ + bool addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + + __cuda_callable__ + bool setRowFast( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + + bool setRow( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + + + __cuda_callable__ + bool addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + + __cuda_callable__ + RealType getElementFast( const IndexType row, + const IndexType column ) const; + + RealType getElement( const IndexType row, + const IndexType column ) const; + + __cuda_callable__ + void getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const; + + __cuda_callable__ + MatrixRow getRow( const IndexType rowIndex ); + + __cuda_callable__ + ConstMatrixRow getRow( const IndexType rowIndex ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + // TODO: add const RealType& multiplicator = 1.0 ) + + template< typename Real2, typename Index2 > + void addMatrix( const CSR< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const CSR< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + // copy assignment + CSR& operator=( const CSR& matrix ); + + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix ); + + void save( File& file ) const; + + void load( File& file ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void print( std::ostream& str ) const; + + +}; + +} // namespace Conatiners +} // namespace TNL \ No newline at end of file diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp new file mode 100644 index 000000000..2d11bb21e --- /dev/null +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -0,0 +1,221 @@ +/*************************************************************************** + SparseMatrix.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Organization > +static String getSerializationType(); + +template< typename Real, + typename Organization > +String getSerializationTypeVirtual() const; + +template< typename Real, + typename Organization > +void setDimensions( const IndexType rows, + const IndexType columns ); + +template< typename Real, + typename Organization > +void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + +template< typename Real, + typename Organization > +IndexType getRowLength( const IndexType row ) const; + +template< typename Real, + typename Organization > +__cuda_callable__ +IndexType getRowLengthFast( const IndexType row ) const; + +template< typename Real, + typename Organization > +IndexType getNonZeroRowLength( const IndexType row ) const; + +template< typename Real, + typename Organization > +__cuda_callable__ +IndexType getNonZeroRowLengthFast( const IndexType row ) const; + +template< typename Real, + typename Organization > +template< typename Real2, typename Device2, typename Index2 > +void setLike( const CSR< Real2, Device2, Index2 >& matrix ); + +template< typename Real, + typename Organization > +void reset(); + +template< typename Real, + typename Organization > +__cuda_callable__ +bool setElementFast( const IndexType row, + const IndexType column, + const RealType& value ); + +template< typename Real, + typename Organization > +bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + +template< typename Real, + typename Organization > +__cuda_callable__ +bool addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + +template< typename Real, + typename Organization > +bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + +template< typename Real, + typename Organization > +__cuda_callable__ +bool setRowFast( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + +template< typename Real, + typename Organization > +bool setRow( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + + +template< typename Real, + typename Organization > +__cuda_callable__ +bool addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + +template< typename Real, + typename Organization > +bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + +template< typename Real, + typename Organization > +__cuda_callable__ +RealType getElementFast( const IndexType row, + const IndexType column ) const; + +template< typename Real, + typename Organization > +RealType getElement( const IndexType row, + const IndexType column ) const; + +__cuda_callable__ +template< typename Real, + typename Organization > +void getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const; + +template< typename Real, + typename Organization > +__cuda_callable__ +MatrixRow getRow( const IndexType rowIndex ); + +template< typename Real, + typename Organization > +__cuda_callable__ +ConstMatrixRow getRow( const IndexType rowIndex ) const; + +template< typename Real, + typename Organization > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + +template< typename Real, + typename Organization > +template< typename InVector, + typename OutVector > +void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; +// TODO: add const RealType& multiplicator = 1.0 ) + +template< typename Real, + typename Organization > +template< typename Real2, typename Index2 > +void addMatrix( const CSR< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + +template< typename Real, + typename Organization > +template< typename Real2, typename Index2 > +void getTransposition( const CSR< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + +template< typename Real, + typename Organization > +template< typename Vector1, typename Vector2 > +bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + +// copy assignment +template< typename Real, + typename Organization > +CSR& operator=( const CSR& matrix ); + +// cross-device copy assignment +template< typename Real, + typename Organization > +template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > +CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix ); + +template< typename Real, + typename Organization > +void save( File& file ) const; + +template< typename Real, + typename Organization > +void load( File& file ); + +template< typename Real, + typename Organization > +void save( const String& fileName ) const; + +template< typename Real, + typename Organization > +void load( const String& fileName ); + +template< typename Real, + typename Organization > +void print( std::ostream& str ) const; + + + } //namespace Matrices +} // namespace TNL -- GitLab From d849e7ac783b900ca6623df183a2bdefde3bacea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 2 Dec 2019 17:57:39 +0100 Subject: [PATCH 003/179] Added allocator to Matrix and moved getNumberOfMatrixElements from Sparse to Matrix. --- src/TNL/Matrices/Matrix.h | 23 +++--- src/TNL/Matrices/Matrix_impl.h | 127 ++++++++++++++++++++++----------- src/TNL/Matrices/Sparse.h | 2 - src/TNL/Matrices/Sparse_impl.h | 7 -- 4 files changed, 100 insertions(+), 59 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index eb29f62c7..a877fd5c2 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -11,6 +11,7 @@ #pragma once #include +#include #include #include #include @@ -23,22 +24,28 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, - typename Index = int > + typename Index = int, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Matrix : public Object { public: - typedef Real RealType; + using RealType = Real; typedef Device DeviceType; typedef Index IndexType; typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - typedef Containers::Vector< RealType, DeviceType, IndexType > ValuesVector; + typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector; + using RealAllocatorType = RealAllocator; - Matrix(); + Matrix( const RealAllocatorType& allocator = RealAllocatorType() ); + + Matrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& allocator = RealAllocatorType() ); virtual void setDimensions( const IndexType rows, - const IndexType columns ); + const IndexType columns ); virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0; @@ -50,10 +57,10 @@ public: virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Matrix< Real2, Device2, Index2 >& matrix ); + template< typename Real2, typename Device2, typename Index2, typename RealAllocator2 > + void setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix ); - virtual IndexType getNumberOfMatrixElements() const = 0; + IndexType getNumberOfMatrixElements() const; virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h index 3371ee4ec..599e5ad33 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix_impl.h @@ -21,17 +21,33 @@ namespace Matrices { template< typename Real, typename Device, - typename Index > -Matrix< Real, Device, Index >::Matrix() + typename Index, + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: +Matrix( const RealAllocatorType& allocator ) : rows( 0 ), - columns( 0 ) + columns( 0 ), + values( allocator ) { } template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::setDimensions( const IndexType rows, + typename Index, + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: +Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator ) +: rows( rows_ ), + columns( columns_ ), + values( allocator ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows, const IndexType columns ) { TNL_ASSERT( rows > 0 && columns > 0, @@ -42,8 +58,9 @@ void Matrix< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const { rowLengths.setSize( this->getRows() ); getCompressedRowLengths( rowLengths.getView() ); @@ -51,8 +68,9 @@ void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLength template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); for( IndexType row = 0; row < this->getRows(); row++ ) @@ -61,19 +79,31 @@ void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLength template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > template< typename Real2, typename Device2, - typename Index2 > -void Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix ) + typename Index2, + typename RealAllocator2 > +void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix ) { setDimensions( matrix.getRows(), matrix.getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const + typename Index, + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfMatrixElements() const +{ + return this->values.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const { IndexType nonZeroElements( 0 ); for( IndexType i = 0; this->values.getSize(); i++ ) @@ -85,27 +115,30 @@ Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > __cuda_callable__ -Index Matrix< Real, Device, Index >::getRows() const +Index Matrix< Real, Device, Index, RealAllocator >::getRows() const { return this->rows; } template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > __cuda_callable__ -Index Matrix< Real, Device, Index >::getColumns() const +Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const { return this->columns; } template< typename Real, typename Device, - typename Index > -const typename Matrix< Real, Device, Index >::ValuesVector& -Matrix< Real, Device, Index >:: + typename Index, + typename RealAllocator > +const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& +Matrix< Real, Device, Index, RealAllocator >:: getValues() const { return this->values; @@ -113,9 +146,10 @@ getValues() const template< typename Real, typename Device, - typename Index > -typename Matrix< Real, Device, Index >::ValuesVector& -Matrix< Real, Device, Index >:: + typename Index, + typename RealAllocator > +typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& +Matrix< Real, Device, Index, RealAllocator >:: getValues() { return this->values; @@ -123,8 +157,9 @@ getValues() template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::reset() + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; @@ -132,9 +167,10 @@ void Matrix< Real, Device, Index >::reset() template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > template< typename MatrixT > -bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const { if( this->getRows() != matrix.getRows() || this->getColumns() != matrix.getColumns() ) @@ -148,17 +184,19 @@ bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > template< typename MatrixT > -bool Matrix< Real, Device, Index >::operator != ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const { return ! operator == ( matrix ); } template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::save( File& file ) const + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const { Object::save( file ); file.save( &this->rows ); @@ -168,8 +206,9 @@ void Matrix< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::load( File& file ) + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::load( File& file ) { Object::load( file ); file.load( &this->rows ); @@ -179,17 +218,19 @@ void Matrix< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, - typename Index > -void Matrix< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const { } template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > __cuda_callable__ const Index& -Matrix< Real, Device, Index >:: +Matrix< Real, Device, Index, RealAllocator >:: getNumberOfColors() const { return this->numberOfColors; @@ -197,9 +238,10 @@ getNumberOfColors() const template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > void -Matrix< Real, Device, Index >:: +Matrix< Real, Device, Index, RealAllocator >:: computeColorsVector(Containers::Vector &colorsVector) { for( IndexType i = this->getRows() - 1; i >= 0; i-- ) @@ -234,9 +276,10 @@ computeColorsVector(Containers::Vector &colorsVector) template< typename Real, typename Device, - typename Index > + typename Index, + typename RealAllocator > void -Matrix< Real, Device, Index >:: +Matrix< Real, Device, Index, RealAllocator >:: copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix ) { this->numberOfColors = matrix.getNumberOfColors(); diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Sparse.h index 7dc3798d2..c19002443 100644 --- a/src/TNL/Matrices/Sparse.h +++ b/src/TNL/Matrices/Sparse.h @@ -37,8 +37,6 @@ class Sparse : public Matrix< Real, Device, Index > template< typename Real2, typename Device2, typename Index2 > void setLike( const Sparse< Real2, Device2, Index2 >& matrix ); - IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; IndexType getMaxRowLength() const; diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h index d1643db19..dda95e68b 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Sparse_impl.h @@ -36,13 +36,6 @@ void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index this->allocateMatrixElements( matrix.getNumberOfMatrixElements() ); } -template< typename Real, - typename Device, - typename Index > -Index Sparse< Real, Device, Index >::getNumberOfMatrixElements() const -{ - return this->values.getSize(); -} template< typename Real, typename Device, -- GitLab From 6f645c5a2afae4711f71a243cf9dc251d0be66bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 2 Dec 2019 17:58:48 +0100 Subject: [PATCH 004/179] Added unit tests for CSR matrix using Segments. --- src/UnitTests/Matrices/CMakeLists.txt | 16 ++ .../SparseMatrixTest_CSR_segments.cpp | 1 + .../Matrices/SparseMatrixTest_CSR_segments.cu | 1 + .../Matrices/SparseMatrixTest_CSR_segments.h | 141 ++++++++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 2a08be219..f278934a6 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -25,6 +25,12 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + #### + # Segments tests + CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -61,6 +67,13 @@ ELSE( BUILD_CUDA ) ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + #### + # Segments tests + ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) @@ -76,6 +89,9 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT # TODO: DenseMatrixTest is not finished #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +#### +# Segments tests +ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp new file mode 100644 index 000000000..771c74b9a --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_CSR_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu new file mode 100644 index 000000000..771c74b9a --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_CSR_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h new file mode 100644 index 000000000..00654de3c --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -0,0 +1,141 @@ +/*************************************************************************** + SparseMatrixTest_CSR.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "SparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class CSRMatrixTest : public ::testing::Test +{ +protected: + using CSRMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using CSRMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); + +TYPED_TEST( CSRMatrixTest, setDimensionsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetDimensions< CSRMatrixType >(); +} + +//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using CSRMatrixType = typename TestFixture::CSRMatrixType; +// +//// test_SetCompressedRowLengths< CSRMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( CSRMatrixTest, setLikeTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetLike< CSRMatrixType, CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, resetTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Reset< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetElement< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, addElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_AddElement< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetRow< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, vectorProductTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_VectorProduct< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" ); +} + +TYPED_TEST( CSRMatrixTest, printTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Print< CSRMatrixType >(); +} + +#endif + +#include "../main.h" -- GitLab From 3b986213231d04c1f16a2955b37545266a8073a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 2 Dec 2019 17:59:26 +0100 Subject: [PATCH 005/179] Implementing SparseMatrix using Segments. --- src/TNL/Containers/Segments/CSR.h | 25 +- src/TNL/Matrices/SparseMatrix.h | 74 ++- src/TNL/Matrices/SparseMatrix.hpp | 744 +++++++++++++++++++++++------- 3 files changed, 632 insertions(+), 211 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 3aa53e76c..f86def78e 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -19,7 +19,7 @@ namespace TNL { template< typename Device, typename Index > -class Segments +class CSR { public: @@ -29,29 +29,24 @@ class Segments CSR(); - CSR( const SizesHolder& sizes ); + CSR( const Vector< IndexType, DeviceType, IndexType >& sizes ); - CSR( const CSR& csr ); + CSR( const CSR& segments ); - CSR( const CSR&& csr ); - - /** - * \brief Set number of segments - */ - //void setSegmentsCount(); + CSR( const CSR&& segments ); /** * \brief Set sizes of particular segmenets. */ template< typename SizesHolder = OffsetsHolder > - void setSizes( const SizesHolder& sizes ) + void setSizes( const SizesHolder& sizes ); /** * \brief Number segments. */ - Index getSize() const; + IndexType getSize() const; - Index getStorageSize() const; + IndexType getStorageSize() const; IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; @@ -62,13 +57,13 @@ class Segments * function 'f' with arguments 'args' */ template< typename Function, typename... Args > - void forAll( Function& f, Args args ) const; + void forAll( Function& f, Args... args ) const; /*** * \brief Go over all segments and perform a reduction in each of them. */ template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > - void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args args ); + void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args ); protected: @@ -80,4 +75,4 @@ class Segments } // namespace Conatiners } // namespace TNL -#include \ No newline at end of file +#include diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index acca39bf4..e266dc66d 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -10,19 +10,46 @@ #pragma once +#include +#include + namespace TNL { namespace Matrices { template< typename Real, - typename Organization > -class SparseMatrix : public Matrix< Real, typename Organization::Device, typename Organization::Index > + template< typename, typename > class Segments, + typename Device = Devices::Host, + typename Index = int, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > { public: using RealType = Real; - using OrganizationType = Organization; - using DeviceType = typename Organization::DeviceType; - using IndexType = typename Organization::IndexType; + template< typename Device_, typename Index_ > + using SegmentsTemplate = Segments< Device_, Index_ >; + using SegmentsType = Segments< Device, Index >; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using IndexAllocatorType = IndexAllocator; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; + using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + + SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + SparseMatrix( const SparseMatrix& m ); + + SparseMatrix( const SparseMatrix&& m ); + + SparseMatrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); static String getSerializationType(); @@ -43,8 +70,10 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; - template< typename Real2, typename Device2, typename Index2 > - void setLike( const CSR< Real2, Device2, Index2 >& matrix ); + template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); + + IndexType getNumberOfNonzeroMatrixElements() const; void reset(); @@ -106,11 +135,11 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam IndexType* columns, RealType* values ) const; - __cuda_callable__ + /*__cuda_callable__ MatrixRow getRow( const IndexType rowIndex ); __cuda_callable__ - ConstMatrixRow getRow( const IndexType rowIndex ) const; + ConstMatrixRow getRow( const IndexType rowIndex ) const;*/ template< typename Vector > __cuda_callable__ @@ -123,14 +152,15 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam OutVector& outVector ) const; // TODO: add const RealType& multiplicator = 1.0 ) - template< typename Real2, typename Index2 > - void addMatrix( const CSR< Real2, Device, Index2 >& matrix, + /*template< typename Real2, typename Index2 > + void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); template< typename Real2, typename Index2 > - void getTransposition( const CSR< Real2, Device, Index2 >& matrix, + void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0 ); + */ template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, @@ -139,12 +169,16 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam const RealType& omega = 1.0 ) const; // copy assignment - CSR& operator=( const CSR& matrix ); + SparseMatrix& operator=( const SparseMatrix& matrix ); // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix ); + template< typename Real2, + template< typename, typename > class Segments2, + typename Device2, + typename Index2, + typename RealAllocator2, + typename IndexAllocator2 > + SparseMatrix& operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); void save( File& file ) const; @@ -155,9 +189,13 @@ class SparseMatrix : public Matrix< Real, typename Organization::Device, typenam void load( const String& fileName ); void print( std::ostream& str ) const; + + protected: - + ColumnsVectorType columnsVector; }; } // namespace Conatiners -} // namespace TNL \ No newline at end of file +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 2d11bb21e..abfc1619d 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -10,211 +10,599 @@ #pragma once +#include + namespace TNL { namespace Matrices { -template< typename Real, - typename Organization > -static String getSerializationType(); - -template< typename Real, - typename Organization > -String getSerializationTypeVirtual() const; - -template< typename Real, - typename Organization > -void setDimensions( const IndexType rows, - const IndexType columns ); - -template< typename Real, - typename Organization > -void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - -template< typename Real, - typename Organization > -IndexType getRowLength( const IndexType row ) const; - -template< typename Real, - typename Organization > + template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix( const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) + : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator ) +{ +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix( const SparseMatrix& m ) + : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector ) +{ +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix( const SparseMatrix&& m ) + : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) ) +{ +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator ) +{ +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +String +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getSerializationType() +{ + return String( "Matrices::SparseMatrix< " ) + + TNL::getSerializationType< RealType >() + ", " + + TNL::getSerializationType< SegmentsType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +String +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getRowLength( const IndexType row ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -IndexType getRowLengthFast( const IndexType row ) const; - -template< typename Real, - typename Organization > -IndexType getNonZeroRowLength( const IndexType row ) const; - -template< typename Real, - typename Organization > +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getRowLengthFast( const IndexType row ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getNonZeroRowLength( const IndexType row ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -IndexType getNonZeroRowLengthFast( const IndexType row ) const; - -template< typename Real, - typename Organization > -template< typename Real2, typename Device2, typename Index2 > -void setLike( const CSR< Real2, Device2, Index2 >& matrix ); - -template< typename Real, - typename Organization > -void reset(); - -template< typename Real, - typename Organization > +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getNonZeroRowLengthFast( const IndexType row ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getNumberOfNonzeroMatrixElements() const +{ +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +reset() +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); - -template< typename Real, - typename Organization > -bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); - -template< typename Real, - typename Organization > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setElementFast( const IndexType row, + const IndexType column, + const RealType& value ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - -template< typename Real, - typename Organization > -bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - -template< typename Real, - typename Organization > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + +} + + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -bool setRowFast( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - -template< typename Real, - typename Organization > -bool setRow( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - - -template< typename Real, - typename Organization > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setRowFast( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +setRow( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ) +{ + +} + + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - -template< typename Real, - typename Organization > -bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - - -template< typename Real, - typename Organization > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator ) +{ + +} + + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -RealType getElementFast( const IndexType row, - const IndexType column ) const; - -template< typename Real, - typename Organization > -RealType getElement( const IndexType row, - const IndexType column ) const; +Real +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getElementFast( const IndexType row, + const IndexType column ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +Real +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getElement( const IndexType row, + const IndexType column ) const +{ + +} __cuda_callable__ template< typename Real, - typename Organization > -void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; - -template< typename Real, - typename Organization > + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const +{ + +} + +/*template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -MatrixRow getRow( const IndexType rowIndex ); - -template< typename Real, - typename Organization > +MatrixRow +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getRow( const IndexType rowIndex ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > __cuda_callable__ -ConstMatrixRow getRow( const IndexType rowIndex ) const; - -template< typename Real, - typename Organization > -template< typename Vector > +ConstMatrixRow +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getRow( const IndexType rowIndex ) const +{ + +}*/ + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > __cuda_callable__ -typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - -template< typename Real, - typename Organization > +typename Vector::RealType +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > template< typename InVector, typename OutVector > -void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +vectorProduct( const InVector& inVector, + OutVector& outVector ) const // TODO: add const RealType& multiplicator = 1.0 ) - -template< typename Real, - typename Organization > -template< typename Real2, typename Index2 > -void addMatrix( const CSR< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); - -template< typename Real, - typename Organization > +{ + +} + +/*template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > template< typename Real2, typename Index2 > -void getTransposition( const CSR< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); - -template< typename Real, - typename Organization > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + +}*/ + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > template< typename Vector1, typename Vector2 > -bool performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; +bool +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + +} // copy assignment template< typename Real, - typename Organization > -CSR& operator=( const CSR& matrix ); + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +operator=( const SparseMatrix& matrix ) +{ + +} // cross-device copy assignment template< typename Real, - typename Organization > -template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > -CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix ); - -template< typename Real, - typename Organization > -void save( File& file ) const; - -template< typename Real, - typename Organization > -void load( File& file ); - -template< typename Real, - typename Organization > -void save( const String& fileName ) const; - -template< typename Real, - typename Organization > -void load( const String& fileName ); - -template< typename Real, - typename Organization > -void print( std::ostream& str ) const; + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Real2, + template< typename, typename > class Segments2, + typename Device2, + typename Index2, + typename RealAllocator2, + typename IndexAllocator2 > +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +save( File& file ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +load( File& file ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +save( const String& fileName ) const +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +load( const String& fileName ) +{ + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +print( std::ostream& str ) const +{ + +} } //namespace Matrices -- GitLab From b85f28d3dd64e728dd9d4c66b4fc416fa0478b3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 2 Dec 2019 22:23:34 +0100 Subject: [PATCH 006/179] Implementing SparseMatrix. --- src/TNL/Containers/Segments/CSR.h | 25 +++- src/TNL/Containers/Segments/CSR.hpp | 123 ++++++++++++++---- src/TNL/Matrices/Matrix_impl.h | 1 + src/TNL/Matrices/SparseMatrix.h | 16 ++- src/TNL/Matrices/SparseMatrix.hpp | 118 +++++++++++++---- src/TNL/Matrices/Sparse_impl.h | 1 - .../Matrices/SparseMatrixTest_CSR_segments.h | 4 +- 7 files changed, 227 insertions(+), 61 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index f86def78e..2f194c76d 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -44,26 +44,43 @@ class CSR /** * \brief Number segments. */ + __cuda_callable__ IndexType getSize() const; + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ IndexType getStorageSize() const; + __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; /*** * \brief Go over all segments and for each segment element call - * function 'f' with arguments 'args' + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + template< typename Function, typename... Args > void forAll( Function& f, Args... args ) const; + /*** * \brief Go over all segments and perform a reduction in each of them. */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename... Args > - void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Args... args ); + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + protected: @@ -75,4 +92,4 @@ class CSR } // namespace Conatiners } // namespace TNL -#include +#include diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ea45b40ba..84a0fcb34 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -11,7 +11,7 @@ #pragma once #include -#include +#include #include namespace TNL { @@ -43,68 +43,147 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index > + template< typename SizesHolder > +void CSR< Device, Index >:: -void setSegmentsCount( const IndexType& size ) +setSizes( const SizesHolder& sizes ) { - this->offsets.setSize( size + 1 ); + this->offsets.setSize( sizes.getSize() + 1 ); + auto view = this->offsets.getView( 0, sizes.getSize() ); + view = sizes; + this->offsets.setElement( sizes.getSize(), 0 ); + this->offsets.template scan< Algorithms::ScanType::Exclusive >(); } template< typename Device, typename Index > - template< typename SizesHolder = OffsetsHolder > +__cuda_callable__ +Index CSR< Device, Index >:: -void setSizes( const SizesHolder& sizes ) +getSize() const { - this->offsets.setSize( sizes.getSize() + 1 ); - auto view = this->offsets.getView( 0, sizes.getSize() ); - view = sizes; - this->offsets.setElement( sizes.getSize>(), 0 ); - this->offsets.scan< Algorithms::ScanType::Exclusive >(); + return this->offsets.getSize() - 1; } template< typename Device, typename Index > +__cuda_callable__ +Index CSR< Device, Index >:: -Index getSize() const +getSegmentSize( const IndexType segmentIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +#else + return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); +#endif + } + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSR< Device, Index >:: +getStorageSize() const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ this->getSize() ]; +#else + return offsets.getElement( this->getSize() ); +#endif + } + return offsets[ this->getSize() ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSR< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +CSR< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { - return this->offsets.getSize() - 1; } template< typename Device, typename Index > template< typename Function, typename... Args > +void CSR< Device, Index >:: -void forAll( Function& f, Args args ) const +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) - f( i, j, args ); + if( ! f( i, j, args... ) ) + break; }; - Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSR< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, typename Index > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void CSR< Device, Index >:: -void segmentsReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, Real zero, Args args ) +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { const auto offsetsView = this->offsets.getView(); - auto f = [=] __cuda_callable__ ( const IndexType i, f, args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; Real aux( zero ); for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args ) ); + reduction( aux, fetch( i, j, args... ) ); keeper( i, aux ); }; - Algorithms::ParallelFor< Device >::exec( 0, this->getSize(), f ); + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } - +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSR< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) +{ + this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); +} } // namespace Segements } // namespace Conatiners -} // namespace TNL \ No newline at end of file +} // namespace TNL diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h index 599e5ad33..a93c7a893 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix_impl.h @@ -163,6 +163,7 @@ void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; + this->values.reset(); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index e266dc66d..7581ef090 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -55,9 +55,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > virtual String getSerializationTypeVirtual() const; - void setDimensions( const IndexType rows, - const IndexType columns ); - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -85,6 +82,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > bool setElement( const IndexType row, const IndexType column, const RealType& value ); + __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, @@ -189,10 +187,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void load( const String& fileName ); void print( std::ostream& str ) const; - + + __cuda_callable__ + IndexType getPaddingIndex() const; protected: - ColumnsVectorType columnsVector; + ColumnsVectorType columnIndexes; + + SegmentsType segments; + + IndexAllocator indexAlloctor; + + RealAllocator realAllocator; }; } // namespace Conatiners diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index abfc1619d..1ccb602ef 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -24,7 +24,7 @@ namespace Matrices { SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) - : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnsVector( indexAllocator ) + : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) { } @@ -36,7 +36,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix& m ) - : Matrix< Real, Device, Index, RealAllocator >( m ), columnsVector( m.columnsVector ) + : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) { } @@ -48,7 +48,7 @@ template< typename Real, typename IndexAllocator > SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix&& m ) - : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnsVector( std::move( m.columnsVector ) ) + : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) { } @@ -63,10 +63,10 @@ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) -: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnsVector( indexAllocator ) +: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { } - + template< typename Real, template< typename, typename > class Segments, typename Device, @@ -96,20 +96,6 @@ getSerializationTypeVirtual() const return this->getSerializationType(); } -template< typename Real, - template< typename, typename > class Segments, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -setDimensions( const IndexType rows, - const IndexType columns ) -{ - -} - template< typename Real, template< typename, typename > class Segments, typename Device, @@ -120,7 +106,12 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { - + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); + this->segments.setSizes( rowLengths ); + this->values.setSize( this->segments.getStorageSize() ); + this->values = ( RealType ) 0; + this->columnIndexes.setSize( this->segments.getStorageSize() ); + this->columnIndexes = this->getPaddingIndex(); } template< typename Real, @@ -188,7 +179,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { - + Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, @@ -213,7 +204,9 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: reset() { - + Matrix< Real, Device, Index >::reset(); + this->columnIndexes.reset(); + } template< typename Real, @@ -229,7 +222,7 @@ setElementFast( const IndexType row, const IndexType column, const RealType& value ) { - + return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, @@ -244,7 +237,7 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) { - + return this->addElement( row, column, value, 0.0 ); } template< typename Real, @@ -277,7 +270,56 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - + TNL_ASSERT( row >= 0 && row < this->rows && + column >= 0 && column < this->columns, + std::cerr << " row = " << row + << " column = " << column + << " this->rows = " << this->rows + << " this->columns = " << this->columns ); + + const IndexType rowSize = this->segments.getSegmentSize( row ); + IndexType col( this->getPaddingIndex() ); + IndexType i; + IndexType globalIdx; + for( i = 0; i < rowSize; i++ ) + { + globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + { + this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); + return true; + } + if( col == this->getPaddingIndex() || col > column ) + break; + } + if( i == rowSize ) + return false; + if( col == this->getPaddingIndex() ) + { + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } + else + { + IndexType j = rowSize - 1; + while( j > i ) + { + const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); + const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); + TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); + TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); + this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); + this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); + j--; + } + + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } } @@ -377,16 +419,25 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { - + const IndexType rowSize = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowSize; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + const IndexType col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + return this->values.getElement( globalIdx ); + } + return 0.0; } -__cuda_callable__ template< typename Real, template< typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > +__cuda_callable__ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowFast( const IndexType row, @@ -604,6 +655,19 @@ print( std::ostream& str ) const } +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getPaddingIndex() const +{ + return -1; +} } //namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h index dda95e68b..889d92e62 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Sparse_impl.h @@ -75,7 +75,6 @@ template< typename Real, void Sparse< Real, Device, Index >::reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); this->columnIndexes.reset(); } diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 00654de3c..a14148151 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -108,7 +108,7 @@ TYPED_TEST( CSRMatrixTest, addElementTest ) test_AddElement< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setRowTest ) +/*TYPED_TEST( CSRMatrixTest, setRowTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -134,7 +134,7 @@ TYPED_TEST( CSRMatrixTest, printTest ) using CSRMatrixType = typename TestFixture::CSRMatrixType; test_Print< CSRMatrixType >(); -} +}*/ #endif -- GitLab From 176fedf929095d5b5ae9011b1fd313ac1e9d496a Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Dec 2019 10:48:45 +0100 Subject: [PATCH 007/179] Implementing vector product in SparseMatrix. --- src/TNL/Containers/Segments/CSR.hpp | 5 +- src/TNL/Matrices/SparseMatrix.h | 16 ++- src/TNL/Matrices/SparseMatrix.hpp | 107 +++++++++--------- .../Matrices/SparseMatrixTest_CSR_segments.h | 6 +- 4 files changed, 65 insertions(+), 69 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 84a0fcb34..4dcccac24 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -163,11 +163,12 @@ void CSR< Device, Index >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) { - const auto offsetsView = this->offsets.getView(); + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; - Real aux( zero ); + RealType aux( zero ); for( IndexType j = begin; j < end; j++ ) reduction( aux, fetch( i, j, args... ) ); keeper( i, aux ); diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 7581ef090..6b6a58f9a 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -133,22 +133,20 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > IndexType* columns, RealType* values ) const; - /*__cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); - - __cuda_callable__ - ConstMatrixRow getRow( const IndexType rowIndex ) const;*/ - template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; + /*** + * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; - // TODO: add const RealType& multiplicator = 1.0 ) + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& inVectorAddition = 0.0 ) const; /*template< typename Real2, typename Index2 > void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, @@ -170,7 +168,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > SparseMatrix& operator=( const SparseMatrix& matrix ); // cross-device copy assignment - template< typename Real2, + template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 1ccb602ef..c89aeac17 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -64,7 +64,7 @@ SparseMatrix( const IndexType rows, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) -{ +{ } template< typename Real, @@ -124,7 +124,7 @@ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowLength( const IndexType row ) const { - + } template< typename Real, @@ -138,7 +138,7 @@ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getRowLengthFast( const IndexType row ) const { - + } template< typename Real, @@ -151,7 +151,7 @@ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getNonZeroRowLength( const IndexType row ) const { - + } template< typename Real, @@ -165,7 +165,7 @@ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getNonZeroRowLengthFast( const IndexType row ) const { - + } template< typename Real, @@ -254,7 +254,7 @@ addElementFast( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - + } template< typename Real, @@ -315,7 +315,7 @@ addElement( const IndexType row, this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); j--; } - + this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); return true; @@ -337,7 +337,6 @@ setRowFast( const IndexType row, const RealType* values, const IndexType elements ) { - } template< typename Real, @@ -353,7 +352,19 @@ setRow( const IndexType row, const RealType* values, const IndexType elements ) { - + const IndexType rowLength = this->segments.getSegmentSize( row ); + if( elements > rowLength ) + return false; + + for( IndexType i = 0; i < elements; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] ); + this->values.setElement( globalIdx, values[ i ] ); + } + for( IndexType i = elements; i < rowLength; i++ ) + this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() ); + return true; } @@ -372,7 +383,7 @@ addRowFast( const IndexType row, const IndexType numberOfElements, const RealType& thisElementMultiplicator ) { - + } template< typename Real, @@ -389,7 +400,7 @@ addRow( const IndexType row, const IndexType numberOfElements, const RealType& thisElementMultiplicator ) { - + } @@ -405,7 +416,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getElementFast( const IndexType row, const IndexType column ) const { - + } template< typename Real, @@ -444,37 +455,9 @@ getRowFast( const IndexType row, IndexType* columns, RealType* values ) const { - -} -/*template< typename Real, - template< typename, typename > class Segments, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -MatrixRow -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -getRow( const IndexType rowIndex ) -{ - } -template< typename Real, - template< typename, typename > class Segments, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -ConstMatrixRow -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -getRow( const IndexType rowIndex ) const -{ - -}*/ - template< typename Real, template< typename, typename > class Segments, typename Device, @@ -488,7 +471,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { - + } template< typename Real, @@ -502,10 +485,24 @@ template< typename InVector, void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: vectorProduct( const InVector& inVector, - OutVector& outVector ) const -// TODO: add const RealType& multiplicator = 1.0 ) -{ - + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& inVectorAddition ) const +{ + auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + auto valuesView = this->values.getConstView(); + auto columnIndexesView = this->columnIndexes.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { + return valuesView[ offset ] * inVectorView[ columnIndexesView[ offset ] ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + const_cast< SegmentsType* >( &this->segments )->segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } /*template< typename Real, @@ -521,7 +518,7 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - + } template< typename Real, @@ -536,7 +533,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { - + }*/ template< typename Real, @@ -553,7 +550,7 @@ performSORIteration( const Vector1& b, Vector2& x, const RealType& omega ) const { - + } // copy assignment @@ -567,7 +564,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix& matrix ) { - + } // cross-device copy assignment @@ -587,7 +584,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { - + } template< typename Real, @@ -600,7 +597,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: save( File& file ) const { - + } template< typename Real, @@ -613,7 +610,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: load( File& file ) { - + } template< typename Real, @@ -626,7 +623,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: save( const String& fileName ) const { - + } template< typename Real, @@ -639,7 +636,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: load( const String& fileName ) { - + } template< typename Real, @@ -652,7 +649,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { - + } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index a14148151..4443d7f6c 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -41,7 +41,7 @@ using CSRMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, long > -#ifdef HAVE_CUDA +#ifdef HAVE_CUDA ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, @@ -108,7 +108,7 @@ TYPED_TEST( CSRMatrixTest, addElementTest ) test_AddElement< CSRMatrixType >(); } -/*TYPED_TEST( CSRMatrixTest, setRowTest ) +TYPED_TEST( CSRMatrixTest, setRowTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -122,7 +122,7 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest ) test_VectorProduct< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +/*TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; -- GitLab From bc152d3f45b048c90a35a0d8d8930a67fbf95ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Tue, 3 Dec 2019 11:13:28 +0100 Subject: [PATCH 008/179] Fixed const in segmentsReduction --- src/TNL/Containers/Segments/CSR.h | 2 +- src/TNL/Containers/Segments/CSR.hpp | 6 +++--- src/TNL/Matrices/SparseMatrix.hpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 2f194c76d..92b4f3949 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -76,7 +76,7 @@ class CSR * \brief Go over all segments and perform a reduction in each of them. */ template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 4dcccac24..e2fd099ae 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -161,11 +161,11 @@ template< typename Device, template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: -segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); - auto offsetsView = this->offsets.getConstView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index c89aeac17..067f36001 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -502,7 +502,7 @@ vectorProduct( const InVector& inVector, auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; }; - const_cast< SegmentsType* >( &this->segments )->segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } /*template< typename Real, -- GitLab From ff7c9054e2205808d109d3349efefd748b2f809b Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Dec 2019 12:58:55 +0100 Subject: [PATCH 009/179] All tests passed for SparseMatrix using Segments. --- src/TNL/Containers/Segments/CSR.h | 5 +++- src/TNL/Containers/Segments/CSR.hpp | 21 ++++++++++++++- src/TNL/Matrices/SparseMatrix.hpp | 27 +++++++++++++++---- .../Matrices/SparseMatrixTest_CSR_segments.h | 6 ++--- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 92b4f3949..e3eff2342 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -79,8 +79,11 @@ class CSR void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ); + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + void save( File& file ) const; + + void load( File& file ); protected: diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index e2fd099ae..c99611958 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -181,10 +181,29 @@ template< typename Device, template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void CSR< Device, Index >:: -allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); } + +template< typename Device, + typename Index > +void +CSR< Device, Index >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index > +void +CSR< Device, Index >:: +load( File& file ) +{ + file >> this->offsets; +} + } // namespace Segements } // namespace Conatiners } // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 067f36001..a43ddba82 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -597,7 +597,9 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: save( File& file ) const { - + Matrix< RealType, DeviceType, IndexType >::save( file ); + file << this->columnIndexes; + this->segments.save( file ); } template< typename Real, @@ -610,7 +612,9 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: load( File& file ) { - + Matrix< RealType, DeviceType, IndexType >::load( file ); + file >> this->columnIndexes; + this->segments.load( file ); } template< typename Real, @@ -623,7 +627,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: save( const String& fileName ) const { - + Object::save( fileName ); } template< typename Real, @@ -636,7 +640,7 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: load( const String& fileName ) { - + Object::load( fileName ); } template< typename Real, @@ -649,7 +653,20 @@ void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { - + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + const IndexType rowLength = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowLength; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + const IndexType column = this->columnIndexes.getElement( globalIdx ); + if( column == this->getPaddingIndex() ) + break; + str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t"; + } + str << std::endl; + } } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 4443d7f6c..a738af0e2 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -122,11 +122,11 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest ) test_VectorProduct< CSRMatrixType >(); } -/*TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" ); + test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" ); } TYPED_TEST( CSRMatrixTest, printTest ) @@ -134,7 +134,7 @@ TYPED_TEST( CSRMatrixTest, printTest ) using CSRMatrixType = typename TestFixture::CSRMatrixType; test_Print< CSRMatrixType >(); -}*/ +} #endif -- GitLab From 2bd090cccf878ce1cede67d855d15e0ce0b3e6d2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Dec 2019 20:46:36 +0100 Subject: [PATCH 010/179] Added Ellpack segments. --- src/TNL/Containers/Segments/CSR.h | 5 +- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.h | 98 ++++++++++++ src/TNL/Containers/Segments/Ellpack.hpp | 190 ++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 src/TNL/Containers/Segments/Ellpack.h create mode 100644 src/TNL/Containers/Segments/Ellpack.hpp diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index e3eff2342..52ca36e22 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -16,7 +16,6 @@ namespace TNL { namespace Containers { namespace Segments { - template< typename Device, typename Index > class CSR @@ -36,7 +35,7 @@ class CSR CSR( const CSR&& segments ); /** - * \brief Set sizes of particular segmenets. + * \brief Set sizes of particular segments. */ template< typename SizesHolder = OffsetsHolder > void setSizes( const SizesHolder& sizes ); @@ -88,9 +87,7 @@ class CSR protected: OffsetsHolder offsets; - }; - } // namespace Segements } // namespace Conatiners } // namespace TNL diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index c99611958..ecd52190c 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -136,7 +136,7 @@ CSR< Device, Index >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; for( IndexType j = begin; j < end; j++ ) diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h new file mode 100644 index 000000000..49f859afb --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -0,0 +1,98 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getAlignment() { return Alignment; } + + Ellpack(); + + Ellpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSizes( const SizesHolder& sizes ); + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp new file mode 100644 index 000000000..0b6240514 --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -0,0 +1,190 @@ +/*************************************************************************** + Ellpack.hpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack() : size( 0 ), rowLength( 0 ) +{ +} + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets ) +{ +} + +template< typename Device, + typename Index > +Ellpack< Device, Index >:: +Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) ) +{ + +} + +template< typename Device, + typename Index > + template< typename SizesHolder > +void +Ellpack< Device, Index >:: +setSizes( const SizesHolder& sizes ) +{ + this->segmentSize = max( sizes ); + this->size = sizes.getSize(); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getSize() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getStorageSize() const +{ + return this->size * this->segmentSize; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +Ellpack< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +Ellpack< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +Ellpack< Device, Index >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets.getView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + for( IndexType j = begin; j < end; j++ ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +Ellpack< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index > +void +Ellpack< Device, Index >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index > +void +Ellpack< Device, Index >:: +load( File& file ) +{ + file >> this->offsets; +} + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL -- GitLab From 3abf57d2212c9290f44fe2521d9c481370d6964c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 3 Dec 2019 21:36:17 +0100 Subject: [PATCH 011/179] Implementing Ellpack segments. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.h | 1 + src/TNL/Containers/Segments/Ellpack.hpp | 156 ++++++++++++------ src/UnitTests/Matrices/CMakeLists.txt | 6 + .../Matrices/SparseMatrixTest_CSR_segments.h | 4 +- .../SparseMatrixTest_Ellpack_segments.cpp | 1 + .../SparseMatrixTest_Ellpack_segments.cu | 1 + .../SparseMatrixTest_Ellpack_segments.h | 141 ++++++++++++++++ 8 files changed, 255 insertions(+), 57 deletions(-) create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ecd52190c..b40524e5e 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -204,6 +204,6 @@ load( File& file ) file >> this->offsets; } - } // namespace Segements + } // namespace Segments } // namespace Conatiners } // namespace TNL diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 49f859afb..772566f51 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -18,6 +18,7 @@ namespace TNL { template< typename Device, typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int Alignment = 32 > class Ellpack { diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 0b6240514..42d7eb8c1 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -20,128 +20,170 @@ namespace TNL { template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack() : size( 0 ), rowLength( 0 ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { } template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { } template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { - } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename SizesHolder > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: setSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); this->size = sizes.getSize(); + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment(); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSize() const { - return this->offsets.getSize() - 1; + return this->size; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const { return this->segmentSize; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getStorageSize() const { - return this->size * this->segmentSize; + return this->alignedSize * this->segmentSize; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ segmentIdx ] + localIdx; -#else - return offsets.getElement( segmentIdx ) + localIdx; -#endif - } - return offsets[ segmentIdx ] + localIdx; + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { - const IndexType begin = offsetsView[ i ]; - const IndexType end = offsetsView[ i + 1 ]; - for( IndexType j = begin; j < end; j++ ) - if( ! f( i, j, args... ) ) - break; - }; - Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + for( IndexType j = begin; j < end; j++ ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i; + const IndexType end = storageSize; + for( IndexType j = begin; j < end; j += alignedSize ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -158,33 +200,39 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: save( File& file ) const { file << this->offsets; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: load( File& file ) { file >> this->offsets; } - } // namespace Segements + } // namespace Segments } // namespace Conatiners } // namespace TNL diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index f278934a6..996dd0430 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -74,6 +74,11 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + + ENDIF( BUILD_CUDA ) @@ -92,6 +97,7 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT #### # Segments tests ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index a738af0e2..b53358469 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -1,8 +1,8 @@ /*************************************************************************** SparseMatrixTest_CSR.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 2, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp new file mode 100644 index 000000000..63219e9b0 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu new file mode 100644 index 000000000..63219e9b0 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h new file mode 100644 index 000000000..79cdf06cf --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -0,0 +1,141 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "SparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class EllpackMatrixTest : public ::testing::Test +{ +protected: + using EllpackMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using EllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes); + +TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetDimensions< EllpackMatrixType >(); +} + +//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using EllpackMatrixType = typename TestFixture::EllpackMatrixType; +// +//// test_SetCompressedRowLengths< EllpackMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( EllpackMatrixTest, setLikeTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetLike< EllpackMatrixType, EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, resetTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Reset< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, addElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_AddElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetRow< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, vectorProductTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_VectorProduct< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" ); +} + +TYPED_TEST( EllpackMatrixTest, printTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Print< EllpackMatrixType >(); +} + +#endif + +#include "../main.h" -- GitLab From 322ac56e385cc398fb44a2cef78067d2cdaf82b6 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 4 Dec 2019 18:08:16 +0100 Subject: [PATCH 012/179] Ellpack segments work well. --- src/TNL/Containers/Segments/Ellpack.h | 1 + src/TNL/Containers/Segments/Ellpack.hpp | 50 +++++++++++----- src/TNL/Matrices/SparseMatrix.hpp | 12 ++-- src/UnitTests/Matrices/CMakeLists.txt | 3 + .../SparseMatrixTest_Ellpack_segments.h | 59 +++++++++++-------- 5 files changed, 83 insertions(+), 42 deletions(-) diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 772566f51..dc1a717b3 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -28,6 +28,7 @@ class Ellpack using IndexType = Index; using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } Ellpack(); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 42d7eb8c1..8a23693ec 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -63,7 +63,7 @@ setSizes( const SizesHolder& sizes ) if( RowMajorOrder ) this->alignedSize = this->size; else - this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment(); + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); } template< typename Device, @@ -186,17 +186,35 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); - const auto offsetsView = this->offsets.getConstView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = offsetsView[ i ]; - const IndexType end = offsetsView[ i + 1 ]; - RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); - keeper( i, aux ); - }; - Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + if( RowMajorOrder ) + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j += alignedSize ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } } template< typename Device, @@ -219,7 +237,9 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: save( File& file ) const { - file << this->offsets; + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); } template< typename Device, @@ -230,7 +250,9 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: load( File& file ) { - file >> this->offsets; + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); } } // namespace Segments diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index a43ddba82..9bc8d7fb7 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -489,12 +489,16 @@ vectorProduct( const InVector& inVector, const RealType& matrixMultiplicator, const RealType& inVectorAddition ) const { - auto inVectorView = inVector.getConstView(); + const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); - auto valuesView = this->values.getConstView(); - auto columnIndexesView = this->columnIndexes.getConstView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { - return valuesView[ offset ] * inVectorView[ columnIndexesView[ offset ] ]; + const IndexType column = columnIndexesView[ offset ]; + if( column == paddingIndex ) + return 0.0; + return valuesView[ offset ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 996dd0430..ef1f04371 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -31,6 +31,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 79cdf06cf..c54aab948 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -26,34 +26,45 @@ protected: using EllpackMatrixType = Matrix; }; +//// +// Row-major format is used for the host system +template< typename Device, typename Index > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >; + // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, long > #endif >; -- GitLab From 2a37be3b4c0d2af5b8d82dd930285cbac8f29af2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 5 Dec 2019 20:29:06 +0100 Subject: [PATCH 013/179] Implementing SlicedEllpack segments. --- src/TNL/Containers/Segments/Ellpack.h | 2 +- src/TNL/Containers/Segments/Ellpack.hpp | 18 ++ src/TNL/Containers/Segments/SlicedEllpack.h | 102 +++++++ src/TNL/Containers/Segments/SlicedEllpack.hpp | 270 ++++++++++++++++++ .../Matrices/SparseMatrixTest_CSR_segments.h | 2 +- .../SparseMatrixTest_Ellpack_segments.h | 2 +- ...parseMatrixTest_SlicedEllpack_segments.cpp | 1 + ...SparseMatrixTest_SlicedEllpack_segments.cu | 1 + .../SparseMatrixTest_SlicedEllpack_segments.h | 152 ++++++++++ 9 files changed, 547 insertions(+), 3 deletions(-) create mode 100644 src/TNL/Containers/Segments/SlicedEllpack.h create mode 100644 src/TNL/Containers/Segments/SlicedEllpack.hpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index dc1a717b3..d99ffe336 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -26,7 +26,6 @@ class Ellpack using DeviceType = Device; using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; static constexpr int getAlignment() { return Alignment; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } @@ -44,6 +43,7 @@ class Ellpack template< typename SizesHolder = OffsetsHolder > void setSizes( const SizesHolder& sizes ); + void setSizes( const IndexType segmentsCount, const IndexType segmentSize ); /** * \brief Number segments. */ diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 8a23693ec..833b162eb 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -66,6 +66,24 @@ setSizes( const SizesHolder& sizes ) this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename SizesHolder > +void +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +setSizes( const IndexType segmentsCount, const IndexType segmentSize ); +{ + this->segmentSize = segmentSize; + this->size = segmentsCount; + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); +} + + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h new file mode 100644 index 000000000..a5ef9d121 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -0,0 +1,102 @@ +/*************************************************************************** + SlicedEllpack.h - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + + SlicedEllpack(); + + SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + SlicedEllpack( const SlicedEllpack& segments ); + + SlicedEllpack( const SlicedEllpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSizes( const SizesHolder& sizes ); + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size; + + OffsetHolder sliceOffsets; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp new file mode 100644 index 000000000..60d2059fe --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -0,0 +1,270 @@ +/*************************************************************************** + SlicedEllpack.hpp - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack() + : size( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack( const SlicedEllpack& slicedEllpack ) + : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack( const SlicedEllpack&& slicedEllpack ) + : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename SizesHolder > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +setSizes( const SizesHolder& sizes ) +{ + this->size = sizes.getSize(); + const IndexType segmentsCount = roundUpDivision( this->size, getSliceSize() ); + this->segmentOffsets.setSize( segmentsCount + 1 ); + Ellpack< DeviceType, IndexType, true > ellpack; + ellpack.setSizes( segmentsCount, SliceSize ); + ... + + + + + + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size, this->getSliceSize() ) * this->getSliceSize(); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getStorageSize() const +{ + return this->alignedSize * this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets.getView(); + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + for( IndexType j = begin; j < end; j++ ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i; + const IndexType end = storageSize; + for( IndexType j = begin; j < end; j += alignedSize ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + if( RowMajorOrder ) + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j += alignedSize ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +save( File& file ) const +{ + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +load( File& file ) +{ + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index b53358469..bf4e452fa 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_CSR.h - description + SparseMatrixTest_CSR_segments.h - description ------------------- begin : Dec 2, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index c54aab948..edfe0bc28 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_Ellpack.h - description + SparseMatrixTest_Ellpack_segments.h - description ------------------- begin : Dec 3, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp new file mode 100644 index 000000000..a88301100 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu new file mode 100644 index 000000000..a88301100 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h new file mode 100644 index 000000000..8d17b8be7 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -0,0 +1,152 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "SparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class SlicedEllpackMatrixTest : public ::testing::Test +{ +protected: + using SlicedEllpackMatrixType = Matrix; +}; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >; + +// types for which MatrixTest is instantiated +using SlicedEllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); + +TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetDimensions< SlicedEllpackMatrixType >(); +} + +//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; +// +//// test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, resetTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Reset< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, setElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, addElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_AddElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, setRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetRow< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_VectorProduct< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" ); +} + +TYPED_TEST( SlicedEllpackMatrixTest, printTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Print< SlicedEllpackMatrixType >(); +} + +#endif + +#include "../main.h" -- GitLab From 47d413b3623fff9e2b733955bd55481a430d8d93 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 6 Dec 2019 16:30:30 +0100 Subject: [PATCH 014/179] Added Segments unit tests. --- src/TNL/Containers/Segments/CSR.h | 19 +++- src/TNL/Containers/Segments/CSR.hpp | 28 +++++- src/TNL/Containers/Segments/Ellpack.h | 15 ++- src/TNL/Containers/Segments/Ellpack.hpp | 42 +++++++- src/TNL/Matrices/SparseMatrix.hpp | 2 +- src/UnitTests/Containers/CMakeLists.txt | 2 + .../Containers/Segments/CMakeLists.txt | 52 ++++++++++ .../Containers/Segments/SegmentsTest.hpp | 95 +++++++++++++++++++ .../Containers/Segments/SegmentsTest_CSR.cpp | 1 + .../Containers/Segments/SegmentsTest_CSR.cu | 1 + .../Containers/Segments/SegmentsTest_CSR.h | 57 +++++++++++ .../Segments/SegmentsTest_Ellpack.cpp | 1 + .../Segments/SegmentsTest_Ellpack.cu | 1 + .../Segments/SegmentsTest_Ellpack.h | 65 +++++++++++++ 14 files changed, 364 insertions(+), 17 deletions(-) create mode 100644 src/UnitTests/Containers/Segments/CMakeLists.txt create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest.hpp create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_CSR.h create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 52ca36e22..ecd1de983 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -25,10 +25,11 @@ class CSR using DeviceType = Device; using IndexType = Index; using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; CSR(); - CSR( const Vector< IndexType, DeviceType, IndexType >& sizes ); + CSR( const SegmentsSizes& sizes ); CSR( const CSR& segments ); @@ -38,17 +39,29 @@ class CSR * \brief Set sizes of particular segments. */ template< typename SizesHolder = OffsetsHolder > - void setSizes( const SizesHolder& sizes ); + void setSegmentsSizes( const SizesHolder& sizes ); /** * \brief Number segments. */ __cuda_callable__ - IndexType getSize() const; + IndexType getSegmentsCount() const; + /*** + * \brief Returns size of the segment number \r segmentIdx + */ __cuda_callable__ IndexType getSegmentSize( const IndexType segmentIdx ) const; + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ __cuda_callable__ IndexType getStorageSize() const; diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index b40524e5e..677cd1b00 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -26,6 +26,14 @@ CSR() { } +template< typename Device, + typename Index > +CSR< Device, Index >:: +CSR( const SegmentsSizes& segmentsSizes ) +{ + this->setSegmentsSizes( segmentsSizes ); +} + template< typename Device, typename Index > CSR< Device, Index >:: @@ -46,7 +54,7 @@ template< typename Device, template< typename SizesHolder > void CSR< Device, Index >:: -setSizes( const SizesHolder& sizes ) +setSegmentsSizes( const SizesHolder& sizes ) { this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); @@ -60,7 +68,7 @@ template< typename Device, __cuda_callable__ Index CSR< Device, Index >:: -getSize() const +getSegmentsCount() const { return this->offsets.getSize() - 1; } @@ -83,6 +91,16 @@ getSegmentSize( const IndexType segmentIdx ) const return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; } +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSR< Device, Index >:: +getSize() const +{ + return this->getStorageSize(); +} + template< typename Device, typename Index > __cuda_callable__ @@ -93,12 +111,12 @@ getStorageSize() const if( ! std::is_same< DeviceType, Devices::Host >::value ) { #ifdef __CUDA_ARCH__ - return offsets[ this->getSize() ]; + return offsets[ this->getSegmentsCount() ]; #else - return offsets.getElement( this->getSize() ); + return offsets.getElement( this->getSegmentsCount() ); #endif } - return offsets[ this->getSize() ]; + return offsets[ this->getSegmentsCount() ]; } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index d99ffe336..b08ad0f04 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -28,10 +28,14 @@ class Ellpack using IndexType = Index; static constexpr int getAlignment() { return Alignment; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; Ellpack(); - Ellpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + Ellpack( const SegmentsSizes& sizes ); + + Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); Ellpack( const Ellpack& segments ); @@ -41,18 +45,21 @@ class Ellpack * \brief Set sizes of particular segments. */ template< typename SizesHolder = OffsetsHolder > - void setSizes( const SizesHolder& sizes ); + void setSegmentsSizes( const SizesHolder& sizes ); - void setSizes( const IndexType segmentsCount, const IndexType segmentSize ); + void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); /** * \brief Number segments. */ __cuda_callable__ - IndexType getSize() const; + IndexType getSegmentsCount() const; __cuda_callable__ IndexType getSegmentSize( const IndexType segmentIdx ) const; + __cuda_callable__ + IndexType getSize() const; + __cuda_callable__ IndexType getStorageSize() const; diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 833b162eb..e855d0d9a 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -29,6 +29,28 @@ Ellpack() { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const SegmentsSizes& segmentsSizes ) + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ + this->setSegmentsSizes( segmentsSizes ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ + this->setSegmentsSizes( segmentsCount, segmentSize ); +} + template< typename Device, typename Index, bool RowMajorOrder, @@ -56,7 +78,7 @@ template< typename Device, template< typename SizesHolder > void Ellpack< Device, Index, RowMajorOrder, Alignment >:: -setSizes( const SizesHolder& sizes ) +setSegmentsSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); this->size = sizes.getSize(); @@ -70,10 +92,9 @@ template< typename Device, typename Index, bool RowMajorOrder, int Alignment > - template< typename SizesHolder > void Ellpack< Device, Index, RowMajorOrder, Alignment >:: -setSizes( const IndexType segmentsCount, const IndexType segmentSize ); +setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) { this->segmentSize = segmentSize; this->size = segmentsCount; @@ -91,7 +112,7 @@ template< typename Device, __cuda_callable__ Index Ellpack< Device, Index, RowMajorOrder, Alignment >:: -getSize() const +getSegmentsCount() const { return this->size; } @@ -108,6 +129,19 @@ getSegmentSize( const IndexType segmentIdx ) const return this->segmentSize; } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +getSize() const +{ + return this->size * this->segmentSize; +} + + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 9bc8d7fb7..37f59c058 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -107,7 +107,7 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); - this->segments.setSizes( rowLengths ); + this->segments.setSegmentsSizes( rowLengths ); this->values.setSize( this->segments.getStorageSize() ); this->values = ( RealType ) 0; this->columnIndexes.setSize( this->segments.getStorageSize() ); diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index 9f27aaa86..227a86551 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -1,3 +1,5 @@ +ADD_SUBDIRECTORY( Segments ) + ADD_EXECUTABLE( ArrayTest ArrayTest.cpp ) TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} ) diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt new file mode 100644 index 000000000..6304a4998 --- /dev/null +++ b/src/UnitTests/Containers/Segments/CMakeLists.txt @@ -0,0 +1,52 @@ +IF( BUILD_CUDA ) +# CUDA_ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + +# CUDA_ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + +# CUDA_ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + +# CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ELSE( BUILD_CUDA ) +# ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + +# ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + +# ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + +# ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) +ENDIF( BUILD_CUDA ) + + +#ADD_TEST( SegmentsTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) + diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp new file mode 100644 index 000000000..9aa7fb94f --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -0,0 +1,95 @@ +/*************************************************************************** + SegmentsTest.hpp - description + ------------------- + begin : Dec 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include + +#ifdef HAVE_GTEST +#include + +template< typename Segments > +void test_SetSegmentsSizes_EqualSizes() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); + segmentsSizes = 5;//segmentSize; + + Segments segments( segmentsSizes ); + + EXPECT_EQ( segments.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments.getSegmentSize( i ), segmentSize ); + + Segments segments2( segments ); + EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments2.getSize(), segments2.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize ); + + Segments segments3; + segments3.setSegmentsSizes( segmentsSizes ); + + EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments3.getSize(), segments3.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); +} + +template< typename Segments > +void test_SetSegmentsSizes_EqualSizes_EllpackOnly() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + + Segments segments( segmentsCount, segmentSize ); + + EXPECT_EQ( segments.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments.getSegmentSize( i ), segmentSize ); + + Segments segments2( segments ); + EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments2.getSize(), segments2.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize ); + + Segments segments3; + segments3.setSegmentsSizes( segmentsCount, segmentSize ); + + EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments3.getSize(), segments3.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); +} + +#endif diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp new file mode 100644 index 000000000..02edac332 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_CSR.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu new file mode 100644 index 000000000..02edac332 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu @@ -0,0 +1 @@ +#include "SegmentsTest_CSR.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h new file mode 100644 index 000000000..e92b7c738 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h @@ -0,0 +1,57 @@ +/*************************************************************************** + SegmentsTest_CSR.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include + +#include "SegmentsTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Segments > +class CSRSegmentsTest : public ::testing::Test +{ +protected: + using CSRSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using CSRSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, long >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, long >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, +#endif +>; + +TYPED_TEST_SUITE( CSRSegmentsTest, CSRSegmentsTypes ); + +TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using CSRSegmentsType = typename TestFixture::CSRSegmentsType; + + test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp new file mode 100644 index 000000000..120a25103 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_Ellpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu new file mode 100644 index 000000000..120a25103 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu @@ -0,0 +1 @@ +#include "SegmentsTest_Ellpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h new file mode 100644 index 000000000..d484fd27d --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h @@ -0,0 +1,65 @@ +/*************************************************************************** + SegmentsTest_Ellpack.h - description + ------------------- + begin : Dec 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include + +#include "SegmentsTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Segments > +class EllpackSegmentsTest : public ::testing::Test +{ +protected: + using EllpackSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using EllpackSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, +#endif +>; + +TYPED_TEST_SUITE( EllpackSegmentsTest, EllpackSegmentsTypes ); + +TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes< EllpackSegmentsType >(); +} + +TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >(); +} + + +#endif + +#include "../../main.h" -- GitLab From 8bdceeae02a439a915709675a801b1e60bc6f887 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 6 Dec 2019 23:13:52 +0100 Subject: [PATCH 015/179] Implementing unit tests for Segments. --- .../Containers/Segments/SegmentsTest.hpp | 39 ++++++++++++++++++- .../Segments/SegmentsTest_Ellpack.h | 6 +++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 9aa7fb94f..19fff24fa 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -25,7 +25,7 @@ void test_SetSegmentsSizes_EqualSizes() const IndexType segmentsCount = 20; const IndexType segmentSize = 5; TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); - segmentsSizes = 5;//segmentSize; + segmentsSizes = segmentSize; Segments segments( segmentsSizes ); @@ -92,4 +92,41 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly() EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); } +template< typename Segments > +void test_GetMaxInSegments() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + const IndexType size = segmentsCount * segmentSize; + + Segments segments( segmentsCount, segmentSize ); + TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); + segmentsSizes = segmentSize; + + Segments segments( segmentsSizes ); + + TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( size ); + + for( IndexType i = 0; i < size; i++ ) + v.setElement( i, i ); + + TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); + + const auto v_view = v.getConstView(); + auto result_view = result.getView(); + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> IndexType { + return v_view[ i ]; + } + auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { + a = TNL::max( a, b ); + } + auto keep = [=] __cuda_callable__ ( IndexType& i, const IndexType a ) mutable { + result_view[ i ] = a; + } + segments.allReduction( fetch, reduction, keep, std::numeric_limits< ResultType >::min() ); +} + #endif diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h index d484fd27d..510fa8738 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h @@ -59,6 +59,12 @@ TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly ) test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >(); } +TYPED_TEST( EllpackSegmentsTest, getMaxInSegments ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_GetMaxInSegments< EllpackSegmentsType >(); +} #endif -- GitLab From c8c5fc096930d387d0ce3a8b654a3db19f363d83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 7 Dec 2019 11:58:37 +0100 Subject: [PATCH 016/179] Added test for allReduction in Segments. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.hpp | 2 +- .../Containers/Segments/SegmentsTest.hpp | 29 +++++++++++-------- .../Containers/Segments/SegmentsTest_CSR.h | 7 +++++ .../Segments/SegmentsTest_Ellpack.h | 4 +-- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 677cd1b00..486149e04 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -201,7 +201,7 @@ void CSR< Device, Index >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index e855d0d9a..034b0820e 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -278,7 +278,7 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 19fff24fa..484b92eb4 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -93,7 +93,7 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly() } template< typename Segments > -void test_GetMaxInSegments() +void test_AllReduction_MaximumInSegments() { using DeviceType = typename Segments::DeviceType; using IndexType = typename Segments::IndexType; @@ -102,31 +102,36 @@ void test_GetMaxInSegments() const IndexType segmentSize = 5; const IndexType size = segmentsCount * segmentSize; - Segments segments( segmentsCount, segmentSize ); TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); segmentsSizes = segmentSize; Segments segments( segmentsSizes ); - TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( size ); + TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() ); - for( IndexType i = 0; i < size; i++ ) - v.setElement( i, i ); + IndexType k( 1 ); + for( IndexType i = 0; i < segmentsCount; i++ ) + for( IndexType j = 0; j < segmentSize; j++ ) + v.setElement( segments.getGlobalIndex( i, j ), k++ ); TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); const auto v_view = v.getConstView(); auto result_view = result.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType i ) -> IndexType { - return v_view[ i ]; - } + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType { + return v_view[ globalIdx ]; + }; auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { a = TNL::max( a, b ); - } - auto keep = [=] __cuda_callable__ ( IndexType& i, const IndexType a ) mutable { + }; + auto keep = [=] __cuda_callable__ ( const IndexType i, const IndexType a ) mutable { result_view[ i ] = a; - } - segments.allReduction( fetch, reduction, keep, std::numeric_limits< ResultType >::min() ); + }; + segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + + std::cerr << result << std::endl; + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); } #endif diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h index e92b7c738..81d4e9ff3 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h @@ -52,6 +52,13 @@ TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes ) test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >(); } +TYPED_TEST( CSRSegmentsTest, allReduction_MaximumInSegments ) +{ + using CSRSegmentsType = typename TestFixture::CSRSegmentsType; + + test_AllReduction_MaximumInSegments< CSRSegmentsType >(); +} + #endif #include "../../main.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h index 510fa8738..7b5e90b23 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h @@ -59,11 +59,11 @@ TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly ) test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >(); } -TYPED_TEST( EllpackSegmentsTest, getMaxInSegments ) +TYPED_TEST( EllpackSegmentsTest, allReduction_MaximumInSegments ) { using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; - test_GetMaxInSegments< EllpackSegmentsType >(); + test_AllReduction_MaximumInSegments< EllpackSegmentsType >(); } #endif -- GitLab From bfb8e0c0a4794f48e95e6eff29f572d0f952228a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 7 Dec 2019 12:10:17 +0100 Subject: [PATCH 017/179] Implementing SlicedEllpack segments. --- src/TNL/Containers/Segments/SlicedEllpack.hpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 60d2059fe..c91a13473 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -64,9 +64,22 @@ setSizes( const SizesHolder& sizes ) this->segmentOffsets.setSize( segmentsCount + 1 ); Ellpack< DeviceType, IndexType, true > ellpack; ellpack.setSizes( segmentsCount, SliceSize ); - ... + const IndexType _size = this->getSize(); + const auto sizes_view = sizes.getConstView(); + auto offsets_view = this->segmentOffsets().getView(); + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType { + if( globalIdx < size ) + return sizes_view[ globalIdx ]; + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) { + aux = TNL::max( aux, i ); + }; + auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) { + offsets_view[ i ] = res; + } + std::cerr << offsets_view << std::endl; -- GitLab From 01f367ca5e64c90d8c1db876e16bad37b3ef6e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 20:07:29 +0100 Subject: [PATCH 018/179] Implementing SlicedEllpack segments. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.hpp | 5 +- src/TNL/Containers/Segments/SlicedEllpack.h | 14 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 191 ++++++++++++------ 4 files changed, 141 insertions(+), 71 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 486149e04..ef7431038 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -153,7 +153,7 @@ void CSR< Device, Index >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { - const auto offsetsView = this->offsets.getView(); + const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 034b0820e..d3d90be5e 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -189,11 +189,10 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { - const auto offsetsView = this->offsets.getView(); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; for( IndexType j = begin; j < end; j++ ) @@ -206,7 +205,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const IndexType storageSize = this->getStorageSize(); const IndexType alignedSize = this->alignedSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i; const IndexType end = storageSize; for( IndexType j = begin; j < end; j += alignedSize ) diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index a5ef9d121..ecc2c8c7e 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -42,7 +42,13 @@ class SlicedEllpack * \brief Set sizes of particular segments. */ template< typename SizesHolder = OffsetsHolder > - void setSizes( const SizesHolder& sizes ); + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; /** * \brief Number segments. @@ -50,8 +56,6 @@ class SlicedEllpack __cuda_callable__ IndexType getSize() const; - __cuda_callable__ - IndexType getSegmentSize( const IndexType segmentIdx ) const; __cuda_callable__ IndexType getStorageSize() const; @@ -90,9 +94,9 @@ class SlicedEllpack protected: - IndexType size; + IndexType size, alignedSize, segmentsCount; - OffsetHolder sliceOffsets; + OffsetsHolder sliceOffsets, sliceSegmentSizes; }; } // namespace Segements diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index c91a13473..e23ee5f15 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -26,17 +26,30 @@ template< typename Device, int SliceSize > SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: SlicedEllpack() - : size( 0 ) + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ + this->setSegmentsSizes( sizes ); +} + template< typename Device, typename Index, bool RowMajorOrder, int SliceSize > SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack& slicedEllpack ) - : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets ) + : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), + segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), + sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) { } @@ -46,7 +59,9 @@ template< typename Device, int SliceSize > SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack&& slicedEllpack ) - : size( slicedEllpack.size ), sliceOffsets( slicedEllpack.sliceOffsets ) + : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), + segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), + sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) { } @@ -57,36 +72,36 @@ template< typename Device, template< typename SizesHolder > void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: -setSizes( const SizesHolder& sizes ) +setSegmentsSizes( const SizesHolder& sizes ) { - this->size = sizes.getSize(); - const IndexType segmentsCount = roundUpDivision( this->size, getSliceSize() ); - this->segmentOffsets.setSize( segmentsCount + 1 ); + this->segmentsCount = sizes.getSize(); + const IndexType slicesCount = roundUpDivision( this->segmentsCount, getSliceSize() ); + this->sliceOffsets.setSize( slicesCount + 1 ); + this->sliceOffsets = 0; + this->sliceSegmentSizes.setSize( slicesCount ); Ellpack< DeviceType, IndexType, true > ellpack; - ellpack.setSizes( segmentsCount, SliceSize ); + ellpack.setSegmentsSizes( slicesCount, SliceSize ); - const IndexType _size = this->getSize(); + const IndexType _size = sizes.getSize(); const auto sizes_view = sizes.getConstView(); - auto offsets_view = this->segmentOffsets().getView(); + auto slices_view = this->sliceOffsets.getView(); + auto slice_segment_size_view = this->sliceSegmentSizes.getView(); auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType { - if( globalIdx < size ) + if( globalIdx < _size ) return sizes_view[ globalIdx ]; + return 0; }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) { aux = TNL::max( aux, i ); }; - auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) { - offsets_view[ i ] = res; - } - - std::cerr << offsets_view << std::endl; - - - - if( RowMajorOrder ) - this->alignedSize = this->size; - else - this->alignedSize = roundUpDivision( size, this->getSliceSize() ) * this->getSliceSize(); + auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) mutable { + slices_view[ i ] = res * SliceSize; + slice_segment_size_view[ i ] = res; + }; + ellpack.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + this->sliceOffsets.template scan< Algorithms::ScanType::Exclusive >(); + this->size = sum( sizes ); + this->alignedSize = this->sliceOffsets.getElement( slicesCount ); } template< typename Device, @@ -96,9 +111,9 @@ template< typename Device, __cuda_callable__ Index SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: -getSize() const +getSegmentsCount() const { - return this->size; + return this->segmentsCount; } template< typename Device, @@ -110,7 +125,29 @@ Index SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: getSegmentSize( const IndexType segmentIdx ) const { - return this->segmentSize; + const Index sliceIdx = segmentIdx / SliceSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + return this->sliceSegmentSizes[ sliceIdx ]; + else + { +#ifdef __CUDA_ARCH__ + return this->sliceSegmentSizes[ sliceIdx ]; +#else + return this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; } template< typename Device, @@ -122,7 +159,7 @@ Index SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: getStorageSize() const { - return this->alignedSize * this->segmentSize; + return this->alignedSize; } template< typename Device, @@ -134,10 +171,28 @@ Index SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + IndexType sliceOffset, segmentSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + { + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + } + else + { +#ifdef __CUDA__ARCH__ + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; +#else + sliceOffset = this->sliceOffsets.getElement( sliceIdx ); + segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } if( RowMajorOrder ) - return segmentIdx * this->segmentSize + localIdx; + return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; else - return segmentIdx + this->alignedSize * localIdx; + return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; } template< typename Device, @@ -160,28 +215,32 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { - const auto offsetsView = this->offsets.getView(); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { - const IndexType segmentSize = this->segmentSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { - const IndexType begin = i * segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; - for( IndexType j = begin; j < end; j++ ) - if( ! f( i, j, args... ) ) + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } else { - const IndexType storageSize = this->getStorageSize(); - const IndexType alignedSize = this->alignedSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { - const IndexType begin = i; - const IndexType end = storageSize; - for( IndexType j = begin; j < end; j += alignedSize ) - if( ! f( i, j, args... ) ) + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + if( ! f( segmentIdx, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -197,7 +256,7 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: forAll( Function& f, Args... args ) const { - this->forSegments( 0, this->getSize(), f, args... ); + this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, @@ -209,32 +268,36 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); - const IndexType segmentSize = this->segmentSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = i * segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); - keeper( i, aux ); + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } else { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); - const IndexType storageSize = this->getStorageSize(); - const IndexType alignedSize = this->alignedSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = i; - const IndexType end = storageSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); - for( IndexType j = begin; j < end; j += alignedSize ) - reduction( aux, fetch( i, j, args... ) ); - keeper( i, aux ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } @@ -249,7 +312,7 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, @@ -260,9 +323,11 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: save( File& file ) const { - file.save( &segmentSize ); file.save( &size ); file.save( &alignedSize ); + file.save( &segmentsCount ); + this->sliceOffsets.save( file ); + this->sliceSegmentSizes.save( file ); } template< typename Device, @@ -273,9 +338,11 @@ void SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: load( File& file ) { - file.load( &segmentSize ); file.load( &size ); file.load( &alignedSize ); + file.load( &segmentsCount ); + this->sliceOffsets.load( file ); + this->sliceSegmentSizes.load( file ); } } // namespace Segments -- GitLab From 258f4d760ed9c155bbcd750e1792fe1f7f72ea9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 20:07:57 +0100 Subject: [PATCH 019/179] Added SlicedEllpack segments unit tests. --- .../Containers/Segments/CMakeLists.txt | 12 ++-- .../Containers/Segments/SegmentsTest.hpp | 10 ++- .../Segments/SegmentsTest_SlicedEllpack.cpp | 1 + .../Segments/SegmentsTest_SlicedEllpack.cu | 1 + .../Segments/SegmentsTest_SlicedEllpack.h | 64 +++++++++++++++++++ 5 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu create mode 100644 src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt index 6304a4998..742fb69ef 100644 --- a/src/UnitTests/Containers/Segments/CMakeLists.txt +++ b/src/UnitTests/Containers/Segments/CMakeLists.txt @@ -14,8 +14,8 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) -# CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) -# TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) # ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp ) @@ -38,9 +38,9 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) -# ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp ) -# TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) -# TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) @@ -48,5 +48,5 @@ ENDIF( BUILD_CUDA ) #ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) -#ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 484b92eb4..acc75655f 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -40,7 +40,6 @@ void test_SetSegmentsSizes_EqualSizes() EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount ); EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize ); EXPECT_LE( segments2.getSize(), segments2.getStorageSize() ); - for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize ); @@ -100,7 +99,6 @@ void test_AllReduction_MaximumInSegments() const IndexType segmentsCount = 20; const IndexType segmentSize = 5; - const IndexType size = segmentsCount * segmentSize; TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); segmentsSizes = segmentSize; @@ -113,6 +111,13 @@ void test_AllReduction_MaximumInSegments() for( IndexType i = 0; i < segmentsCount; i++ ) for( IndexType j = 0; j < segmentSize; j++ ) v.setElement( segments.getGlobalIndex( i, j ), k++ ); + /*auto view = v.getView(); + auto init = [=] __cuda_callable__ ( const IndexType i, const IndexType j ) mutable -> bool { + view[ j ] = j + 1; + return true; + }; + segments.forAll( init ); + std::cerr << v << std::endl;*/ TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); @@ -129,7 +134,6 @@ void test_AllReduction_MaximumInSegments() }; segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); - std::cerr << result << std::endl; for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); } diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp new file mode 100644 index 000000000..cd9865f28 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_SlicedEllpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu new file mode 100644 index 000000000..cd9865f28 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu @@ -0,0 +1 @@ +#include "SegmentsTest_SlicedEllpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h new file mode 100644 index 000000000..1bcff3191 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h @@ -0,0 +1,64 @@ +/*************************************************************************** + SegmentsTest_SlicedEllpack.h - description + ------------------- + begin : Dec 9, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include + +#include "SegmentsTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Segments > +class SlicedEllpackSegmentsTest : public ::testing::Test +{ +protected: + using SlicedEllpackSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using SlicedEllpackSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, +#endif +>; + +TYPED_TEST_SUITE( SlicedEllpackSegmentsTest, SlicedEllpackSegmentsTypes ); + +TYPED_TEST( SlicedEllpackSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes< SlicedEllpackSegmentsType >(); +} + +TYPED_TEST( SlicedEllpackSegmentsTest, allReduction_MaximumInSegments ) +{ + using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType; + + test_AllReduction_MaximumInSegments< SlicedEllpackSegmentsType >(); +} + +#endif + +#include "../../main.h" -- GitLab From 2ee53835a219ef29baceeb1d8334a436dde27dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 21:10:05 +0100 Subject: [PATCH 020/179] Added segments based SlicedEllpack sparse matrix unit test. --- src/TNL/Containers/Segments/SlicedEllpack.hpp | 8 +- src/UnitTests/Matrices/CMakeLists.txt | 7 ++ .../SparseMatrixTest_SlicedEllpack.cpp | 2 +- .../SparseMatrixTest_SlicedEllpack.cu | 2 +- .../Matrices/SparseMatrixTest_SlicedEllpack.h | 73 +++++++++++-------- 5 files changed, 56 insertions(+), 36 deletions(-) diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index e23ee5f15..c8e74ec59 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -326,8 +326,8 @@ save( File& file ) const file.save( &size ); file.save( &alignedSize ); file.save( &segmentsCount ); - this->sliceOffsets.save( file ); - this->sliceSegmentSizes.save( file ); + file << this->sliceOffsets; + file << this->sliceSegmentSizes; } template< typename Device, @@ -341,8 +341,8 @@ load( File& file ) file.load( &size ); file.load( &alignedSize ); file.load( &segmentsCount ); - this->sliceOffsets.load( file ); - this->sliceSegmentSizes.load( file ); + file >> this->sliceOffsets; + file >> this->sliceSegmentSizes; } } // namespace Segments diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index ef1f04371..9b168bd56 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -34,6 +34,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -81,6 +84,9 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) @@ -101,6 +107,7 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT # Segments tests ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp index 40e2e94b8..a88301100 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp @@ -1 +1 @@ -#include "SparseMatrixTest_SlicedEllpack.h" +#include "SparseMatrixTest_SlicedEllpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu index 40e2e94b8..a88301100 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu @@ -1 +1 @@ -#include "SparseMatrixTest_SlicedEllpack.h" +#include "SparseMatrixTest_SlicedEllpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 0798f59dc..00184754c 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -1,14 +1,16 @@ /*************************************************************************** - SparseMatrixTest_SlicedEllpack.h - description + SparseMatrixTest_SlicedEllpack_segments.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 9, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include +#include +#include + #include "SparseMatrixTest.hpp" #include @@ -24,38 +26,49 @@ protected: using SlicedEllpackMatrixType = Matrix; }; +//// +// Row-major format is used for the host system +template< typename Device, typename Index > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >; + // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long > #endif >; -TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes ); +TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) { @@ -124,7 +137,7 @@ TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack" ); + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" ); } TYPED_TEST( SlicedEllpackMatrixTest, printTest ) -- GitLab From 40db8e95f91a0c69ca485d45f7565cb33ac1d2ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 22:17:47 +0100 Subject: [PATCH 021/179] Implementing SparseMatrix::getNumberOfNonzeroMatrixElements. --- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 6b6a58f9a..b510636f5 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -17,7 +17,7 @@ namespace TNL { namespace Matrices { template< typename Real, - template< typename, typename > class Segments, + template< typename Device_, typename Index_ > class Segments, typename Device = Devices::Host, typename Index = int, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 37f59c058..1c243bcea 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -10,7 +10,9 @@ #pragma once +#include #include +#include namespace TNL { namespace Matrices { @@ -192,6 +194,12 @@ Index SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: getNumberOfNonzeroMatrixElements() const { + const auto columns_view = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( columns_view[ i ] != paddingIndex ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, -- GitLab From 21bd25d4f536738eee8d28a641c1742525ded11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 22:31:22 +0100 Subject: [PATCH 022/179] Added unit test for SparseMatrix::getNumberOfNonzeroMatrixElements. --- src/UnitTests/Matrices/SparseMatrixTest.hpp | 69 +++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index ef5b28d24..5dcd96ebc 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -150,6 +150,75 @@ void test_SetLike() EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + } + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + template< typename Matrix > void test_Reset() { -- GitLab From 93c68aef6be53f00b523519d9fc7b4e3e921974c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 22:51:45 +0100 Subject: [PATCH 023/179] Added error messages to matrix reader. --- src/TNL/Matrices/MatrixReader_impl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index d00fdb904..70fd06d36 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -55,7 +55,10 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, bool symmetricMatrix( false ); if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) ) + { + std::cerr << "Unable to read MTX file header." << std::endl; return false; + } if( symReader && !symmetricMatrix ) { @@ -67,12 +70,18 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, rowLengths.setSize( rows ); if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) ) + { + std::cerr << "Unable to compute compressed row lengths." << std::endl; return false; + } matrix.setCompressedRowLengths( rowLengths ); if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) ) + { + std::cerr << "Unable to read matrix elements from MTX file," << std::endl; return false; + } return true; } @@ -84,7 +93,10 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, bool symmetricMatrix( false ); IndexType rows, columns; if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) ) + { + std::cerr << "Unable to read MTX file header." << std::endl; return false; + } file.clear(); file.seekg( 0, std::ios::beg ); String line; @@ -103,7 +115,10 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, IndexType row( 1 ), column( 1 ); RealType value; if( ! parseMtxLineWithElement( line, row, column, value ) ) + { + std::cerr << "Unable to parse MTX file line." << std::endl; return false; + } if( value != matrix.getElement( row-1, column-1 ) || ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) ) { -- GitLab From 51884a04fead8bd5e5f81f421cc4346a85a029c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 9 Dec 2019 22:52:14 +0100 Subject: [PATCH 024/179] Debuging SpMV benchmark. --- src/Benchmarks/SpMV/spmv.h | 86 +++++++++++++----------- src/Benchmarks/SpMV/tnl-benchmark-spmv.h | 10 +-- 2 files changed, 53 insertions(+), 43 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 6bfee5ffe..7e3928e09 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -25,6 +25,11 @@ #include #include + +#include +#include +#include +#include using namespace TNL::Matrices; #include "cusparseCSRMatrix.h" @@ -36,6 +41,22 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; +// Segments based sparse matrix aliases +template< typename Real, typename Device, typename Index > +using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >; + +template< typename Device, typename Index > +using EllpackSegments = Containers::Segments::Ellpack< Device, Index >; + +template< typename Real, typename Device, typename Index > +using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >; + +template< typename Device, typename Index > +using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index >; + +template< typename Real, typename Device, typename Index > +using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >; + // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) { @@ -85,7 +106,7 @@ void printMatrixInfo( const Matrix& matrix, template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > -bool +void benchmarkSpMV( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) @@ -98,19 +119,10 @@ benchmarkSpMV( Benchmark& benchmark, CSR_DeviceMatrix CSRdeviceMatrix; // Read the matrix for CSR, to set up cuSPARSE - try - { - if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - return false; - } - } - catch( std::bad_alloc& e ) - { - e.what(); - return false; - } + if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) ) + { + throw std::bad_alloc(); + } #ifdef HAVE_CUDA // cuSPARSE handle setup @@ -140,19 +152,10 @@ benchmarkSpMV( Benchmark& benchmark, CudaVector deviceVector, deviceVector2; // Load the format - try - { - if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - return false; - } - } - catch( std::bad_alloc& e ) - { - e.what(); - return false; - } + if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) ) + { + throw std::bad_alloc(); + } // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), @@ -244,7 +247,7 @@ benchmarkSpMV( Benchmark& benchmark, resultcuSPARSEDeviceVector2 = deviceVector2; - // Difference between GPU (curent format) and GPU-cuSPARSE results + // Difference between GPU (current format) and GPU-cuSPARSE results //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) ); //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); @@ -279,26 +282,33 @@ benchmarkSpMV( Benchmark& benchmark, #endif std::cout << std::endl; - return true; } template< typename Real = double, typename Index = int > -bool +void benchmarkSpmvSynthetic( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) { - bool result = true; - result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + + //// + // Segments based sparse matrices + std::cerr << "*********************************" << std::endl; + benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); + std::cerr << "*********************************" << std::endl; + benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); + std::cerr << "*********************************" << std::endl; + benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); + std::cerr << "*********************************" << std::endl; // AdEllpack is broken -// result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); - return result; + // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); } } // namespace Benchmarks diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index 77c079c4c..65416f043 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -96,11 +96,11 @@ main( int argc, char* argv[] ) // * The guide on what parameters to use prints twice. // FIXME: When ./tnl-benchmark-spmv-dbg is called with '--help': // * The guide on what parameter to use print once. - // But then it CRASHES due to segfault: -// The program attempts to get unknown parameter openmp-enabled -// Aborting the program. -// terminate called after throwing an instance of 'int' -// [1] 17156 abort (core dumped) ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help + // But then it CRASHES due to segfault: + // The program attempts to get unknown parameter openmp-enabled + // Aborting the program. + // terminate called after throwing an instance of 'int' + // [1] 17156 abort (core dumped) ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) { conf_desc.printUsage( argv[ 0 ] ); -- GitLab From 0b14ec6443093b8a063ded26365ed853961a6ffb Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 10 Dec 2019 15:31:44 +0100 Subject: [PATCH 025/179] Replacing error messages in MatrixReader with exceptions. --- src/Benchmarks/SpMV/spmv.h | 10 +- src/TNL/Matrices/MatrixReader.h | 20 ++-- src/TNL/Matrices/MatrixReader_impl.h | 168 +++++++++------------------ 3 files changed, 67 insertions(+), 131 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 7e3928e09..a6acb52fd 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -119,10 +119,7 @@ benchmarkSpMV( Benchmark& benchmark, CSR_DeviceMatrix CSRdeviceMatrix; // Read the matrix for CSR, to set up cuSPARSE - if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - } + MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ); #ifdef HAVE_CUDA // cuSPARSE handle setup @@ -152,10 +149,7 @@ benchmarkSpMV( Benchmark& benchmark, CudaVector deviceVector, deviceVector2; // Load the format - if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - } + MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h index aaf75a373..2c3cbb424 100644 --- a/src/TNL/Matrices/MatrixReader.h +++ b/src/TNL/Matrices/MatrixReader.h @@ -15,7 +15,7 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class MatrixReaderDeviceDependentCode @@ -30,24 +30,24 @@ class MatrixReader typedef typename Matrix::DeviceType DeviceType; typedef typename Matrix::RealType RealType; - static bool readMtxFile( const String& fileName, + static void readMtxFile( const String& fileName, Matrix& matrix, bool verbose = false, bool symReader = false ); - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose = false, bool symReader = false ); - static bool readMtxFileHostMatrix( std::istream& file, + static void readMtxFileHostMatrix( std::istream& file, Matrix& matrix, typename Matrix::CompressedRowLengthsVector& rowLengths, bool verbose, bool symReader ); - static bool verifyMtxFile( std::istream& file, + static void verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose = false ); @@ -58,16 +58,16 @@ class MatrixReader IndexType& lineNumber ); protected: - static bool checkMtxHeader( const String& header, + static void checkMtxHeader( const String& header, bool& symmetric ); - static bool readMtxHeader( std::istream& file, + static void readMtxHeader( std::istream& file, IndexType& rows, IndexType& columns, bool& symmetricMatrix, bool verbose ); - static bool computeCompressedRowLengthsFromMtxFile( std::istream& file, + static void computeCompressedRowLengthsFromMtxFile( std::istream& file, Containers::Vector< int, DeviceType, int >& rowLengths, const int columns, const int rows, @@ -75,13 +75,13 @@ class MatrixReader bool verbose, bool symReader = false ); - static bool readMatrixElementsFromMtxFile( std::istream& file, + static void readMatrixElementsFromMtxFile( std::istream& file, Matrix& matrix, bool symmetricMatrix, bool verbose, bool symReader ); - static bool parseMtxLineWithElement( const String& line, + static void parseMtxLineWithElement( const String& line, IndexType& row, IndexType& column, RealType& value ); diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index 70fd06d36..25643d8c7 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -11,6 +11,7 @@ #pragma once #include +#include #include #include #include @@ -20,7 +21,7 @@ namespace TNL { namespace Matrices { template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFile( const String& fileName, +void MatrixReader< Matrix >::readMtxFile( const String& fileName, Matrix& matrix, bool verbose, bool symReader ) @@ -28,24 +29,21 @@ bool MatrixReader< Matrix >::readMtxFile( const String& fileName, std::fstream file; file.open( fileName.getString(), std::ios::in ); if( ! file ) - { - std::cerr << "I am not able to open the file " << fileName << "." << std::endl; - return false; - } - return readMtxFile( file, matrix, verbose, symReader ); + throw std::runtime_error( std::string( "I am not able to open the file " ) + fileName.getString() ); + readMtxFile( file, matrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFile( std::istream& file, +void MatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) { - return MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader ); + MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, +void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, Matrix& matrix, typename Matrix::CompressedRowLengthsVector& rowLengths, bool verbose, @@ -54,17 +52,10 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, IndexType rows, columns; bool symmetricMatrix( false ); - if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) ) - { - std::cerr << "Unable to read MTX file header." << std::endl; - return false; - } + readMtxHeader( file, rows, columns, symmetricMatrix, verbose ); if( symReader && !symmetricMatrix ) - { - std::cout << "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." << std::endl; - return false; - } + throw std::runtime_error( "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." ); matrix.setDimensions( rows, columns ); rowLengths.setSize( rows ); @@ -75,28 +66,21 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, return false; } + computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ); + matrix.setCompressedRowLengths( rowLengths ); - if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) ) - { - std::cerr << "Unable to read matrix elements from MTX file," << std::endl; - return false; - } - return true; + readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, +void MatrixReader< Matrix >::verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose ) { bool symmetricMatrix( false ); IndexType rows, columns; - if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) ) - { - std::cerr << "Unable to read MTX file header." << std::endl; - return false; - } + readMtxHeader( file, rows, columns, symmetricMatrix, false ); file.clear(); file.seekg( 0, std::ios::beg ); String line; @@ -114,19 +98,16 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - { - std::cerr << "Unable to parse MTX file line." << std::endl; - return false; - } + parseMtxLineWithElement( line, row, column, value ); if( value != matrix.getElement( row-1, column-1 ) || ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) ) { - std::cerr << "*** !!! VERIFICATION ERROR !!! *** " << std::endl - << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl - << "The matrix value is " << matrix.getElement( row-1, column-1 ) - << " while the file value is " << value << "." << std::endl; - return false; + std::stringstream str; + str << "*** !!! VERIFICATION ERROR !!! *** " << std::endl + << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl + << "The matrix value is " << matrix.getElement( row-1, column-1 ) + << " while the file value is " << value << "." << std::endl; + throw std::runtime_error( str.str() ); } processedElements++; if( symmetricMatrix && row != column ) @@ -141,7 +122,6 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, std::cout << " Verifying the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements() << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - return true; } template< typename Matrix > @@ -167,8 +147,7 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file, } IndexType currentRow( 1 ), currentColumn( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, currentRow, currentColumn, value ) ) - return false; + parseMtxLineWithElement( line, currentRow, currentColumn, value ); if( ( currentRow == row + 1 && currentColumn == column + 1 ) || ( symmetricMatrix && currentRow == column + 1 && currentColumn == row + 1 ) ) return true; @@ -177,45 +156,30 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file, } template< typename Matrix > -bool MatrixReader< Matrix >::checkMtxHeader( const String& header, +void MatrixReader< Matrix >::checkMtxHeader( const String& header, bool& symmetric ) { std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty ); - if( (int) parsedLine.size() < 5 ) - return false; - if( parsedLine[ 0 ] != "%%MatrixMarket" ) - return false; + if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" ) + throw std::runtime_error( "Wrong MTX file header. We expect line like this: %%MatrixMarket matrix coordinate real general" ); if( parsedLine[ 1 ] != "matrix" ) - { - std::cerr << "Error: 'matrix' expected in the header line (" << header << ")." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() ); if( parsedLine[ 2 ] != "coordinates" && parsedLine[ 2 ] != "coordinate" ) - { - std::cerr << "Error: Only 'coordinates' format is supported now, not " << parsedLine[ 2 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Error: Only 'coordinates' format is supported now, not " ) + parsedLine[ 2 ].getString() ); if( parsedLine[ 3 ] != "real" ) - { - std::cerr << "Error: Only 'real' matrices are supported, not " << parsedLine[ 3 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Only 'real' matrices are supported, not " ) + parsedLine[ 3 ].getString() ); if( parsedLine[ 4 ] != "general" ) { if( parsedLine[ 4 ] == "symmetric" ) symmetric = true; else - { - std::cerr << "Error: Only 'general' matrices are supported, not " << parsedLine[ 4 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Only 'general' matrices are supported, not " ) + parsedLine[ 4 ].getString() ); } - return true; } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, +void MatrixReader< Matrix >::readMtxHeader( std::istream& file, IndexType& rows, IndexType& columns, bool& symmetric, @@ -231,27 +195,18 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, std::getline( file, line ); if( ! headerParsed ) { - headerParsed = checkMtxHeader( line, symmetric ); - if( ! headerParsed ) - return false; + checkMtxHeader( line, symmetric ); if( verbose && symmetric ) std::cout << "The matrix is SYMMETRIC ... "; continue; } if( line[ 0 ] == '%' ) continue; if( ! headerParsed ) - { - std::cerr << "Unknown format of the file. We expect line like this:" << std::endl; - std::cerr << "%%MatrixMarket matrix coordinate real general" << std::endl; - return false; - } + throw std::runtime_error( "Unknown format of the file. We expect line like this: %%MatrixMarket matrix coordinate real general" ); parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) - { - std::cerr << "Wrong number of parameters in the matrix header." << std::endl; - return false; - } + throw std::runtime_error( "Wrong number of parameters in the matrix header - should be 3." ); rows = atoi( parsedLine[ 0 ].getString() ); columns = atoi( parsedLine[ 1 ].getString() ); if( verbose ) @@ -259,16 +214,12 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, << " rows and " << columns << " columns. " << std::endl; if( rows <= 0 || columns <= 0 ) - { - std::cerr << "Wrong parameters in the matrix header." << std::endl; - return false; - } - return true; + throw std::runtime_error( "Row or column index is negative." ); } } template< typename Matrix > -bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file, +void MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file, Containers::Vector< int, DeviceType, int >& rowLengths, const int columns, const int rows, @@ -294,13 +245,13 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - return false; + parseMtxLineWithElement( line, row, column, value ); numberOfElements++; if( column > columns || row > rows ) { - std::cerr << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "." << std::endl; - return false; + std::stringstream str; + str << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "."; + throw std::runtime_error( str.str() ); } if( verbose ) std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands \r" << std::flush; @@ -313,23 +264,23 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea if( rowLengths[ row - 1 ] > columns ) { - std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "." << std::endl; - return false; + std::stringstream str; + str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "."; + throw std::runtime_error( str.str() ); } if( symmetricMatrix && row != column && symReader ) { rowLengths[ column - 1 ]++; if( rowLengths[ column - 1 ] > columns ) { - std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ." << std::endl; - return false; + std::stringstream str; + str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ."; + throw std::runtime_error( str.str() ); } continue; } else if( symmetricMatrix && row != column && !symReader ) - { rowLengths[ column - 1 ]++; - } } file.clear(); long int fileSize = file.tellg(); @@ -338,11 +289,10 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - return true; } template< typename Matrix > -bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, +void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, Matrix& matrix, bool symmetricMatrix, bool verbose, @@ -366,8 +316,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - return false; + parseMtxLineWithElement( line, row, column, value ); if( !symReader || ( symReader && row >= column ) ) @@ -377,9 +326,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, processedElements++; if( symmetricMatrix && row != column && symReader ) - { continue; - } else if( symmetricMatrix && row != column && !symReader ) { matrix.setElement( column - 1, row - 1, value ); @@ -394,12 +341,10 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements() << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - - return true; } template< typename Matrix > -bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, +void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, IndexType& row, IndexType& column, RealType& value ) @@ -407,13 +352,13 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, std::vector< String > parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) { - std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl; - return false; + std::stringstream str; + str << "Wrong number of parameters in the matrix row at line:" << line; + throw std::runtime_error( str.str() ); } row = atoi( parsedLine[ 0 ].getString() ); column = atoi( parsedLine[ 1 ].getString() ); value = ( RealType ) atof( parsedLine[ 2 ].getString() ); - return true; } template<> @@ -422,13 +367,13 @@ class MatrixReaderDeviceDependentCode< Devices::Host > public: template< typename Matrix > - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) { typename Matrix::CompressedRowLengthsVector rowLengths; - return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); + MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); } }; @@ -438,7 +383,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > public: template< typename Matrix > - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) @@ -448,10 +393,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > HostMatrixType hostMatrix; CompressedRowLengthsVector rowLengths; - return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); - - matrix = hostMatrix; - return true; + MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); } }; -- GitLab From 369ae3ce33bca8f6446cf73fa33bff7dcbcbb571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 10 Dec 2019 22:25:29 +0100 Subject: [PATCH 026/179] Fixing MatrixReader. --- src/TNL/Matrices/MatrixReader.h | 2 +- src/TNL/Matrices/MatrixReader_impl.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h index 2c3cbb424..ae0606678 100644 --- a/src/TNL/Matrices/MatrixReader.h +++ b/src/TNL/Matrices/MatrixReader.h @@ -58,7 +58,7 @@ class MatrixReader IndexType& lineNumber ); protected: - static void checkMtxHeader( const String& header, + static bool checkMtxHeader( const String& header, bool& symmetric ); static void readMtxHeader( std::istream& file, diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index 25643d8c7..476a7327e 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -156,12 +156,12 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file, } template< typename Matrix > -void MatrixReader< Matrix >::checkMtxHeader( const String& header, +bool MatrixReader< Matrix >::checkMtxHeader( const String& header, bool& symmetric ) { std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" ) - throw std::runtime_error( "Wrong MTX file header. We expect line like this: %%MatrixMarket matrix coordinate real general" ); + return false; if( parsedLine[ 1 ] != "matrix" ) throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() ); if( parsedLine[ 2 ] != "coordinates" && @@ -176,6 +176,7 @@ void MatrixReader< Matrix >::checkMtxHeader( const String& header, else throw std::runtime_error( std::string( "Only 'general' matrices are supported, not " ) + parsedLine[ 4 ].getString() ); } + return true; } template< typename Matrix > @@ -195,7 +196,7 @@ void MatrixReader< Matrix >::readMtxHeader( std::istream& file, std::getline( file, line ); if( ! headerParsed ) { - checkMtxHeader( line, symmetric ); + headerParsed = checkMtxHeader( line, symmetric ); if( verbose && symmetric ) std::cout << "The matrix is SYMMETRIC ... "; continue; @@ -215,6 +216,7 @@ void MatrixReader< Matrix >::readMtxHeader( std::istream& file, if( rows <= 0 || columns <= 0 ) throw std::runtime_error( "Row or column index is negative." ); + break; } } -- GitLab From a00c862ce14608a0c34078ea1711d19414fc2872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 10 Dec 2019 22:26:37 +0100 Subject: [PATCH 027/179] Implementing SparseMatrix::rowsReduction and SparseMatrix::allRowsReduction. --- src/TNL/Matrices/SparseMatrix.h | 17 ++- src/TNL/Matrices/SparseMatrix.hpp | 51 ++++++- src/UnitTests/Matrices/SparseMatrixTest.hpp | 135 +++++++++++++++--- .../Matrices/SparseMatrixTest_CSR_segments.h | 7 + .../SparseMatrixTest_Ellpack_segments.h | 7 + .../SparseMatrixTest_SlicedEllpack_segments.h | 7 + 6 files changed, 203 insertions(+), 21 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index b510636f5..268fba6d3 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,10 +34,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using IndexType = Index; using RealAllocatorType = RealAllocator; using IndexAllocatorType = IndexAllocator; - using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; - using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType >; + using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); @@ -158,6 +165,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealType& matrixMultiplicator = 1.0 ); */ + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 1c243bcea..e26693f6a 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -517,6 +517,44 @@ vectorProduct( const InVector& inVector, this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + IndexType columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + + /*template< typename Real, template< typename, typename > class Segments, typename Device, @@ -576,7 +614,11 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix& matrix ) { - + Matrix< Real, Device, Index >::operator=( matrix ); + this->columnIndexes = matrix.columnIndexes; + this->segments = matrix.segments; + this->indexAlloctor = matrix.indexAllocator; + this->realAllocator = matrix.realAllocator; } // cross-device copy assignment @@ -596,7 +638,12 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { - + if( std::is_same< Device, Device2 >::value ) + { + + } + + } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 5dcd96ebc..b366f4e2f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -908,18 +908,18 @@ void test_VectorProduct() EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); -/* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ const IndexType m_rows_5 = 8; const IndexType m_cols_5 = 8; @@ -970,20 +970,18 @@ void test_VectorProduct() for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows m_5.setElement( i, 7, 1); - + VectorType inVector_5; inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) inVector_5.setElement( i, 2 ); VectorType outVector_5; outVector_5.setSize( m_rows_5 ); for( IndexType j = 0; j < outVector_5.getSize(); j++ ) outVector_5.setElement( j, 0 ); - - + m_5.vectorProduct( inVector_5, outVector_5 ); - EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); @@ -995,6 +993,109 @@ void test_VectorProduct() EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); } +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities( rows ); + //rowLengths.setSize( rows ); + rowsCapacities.setElement(0, 6); + rowsCapacities.setElement(1, 3); + rowsCapacities.setElement(2, 4); + rowsCapacities.setElement(3, 5); + rowsCapacities.setElement(4, 2); + rowsCapacities.setElement(5, 7); + rowsCapacities.setElement(6, 8); + rowsCapacities.setElement(7, 8); + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + template< typename Matrix > void test_PerformSORIteration() { diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index bf4e452fa..0718e3a69 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -122,6 +122,13 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest ) test_VectorProduct< CSRMatrixType >(); } +TYPED_TEST( CSRMatrixTest, rowsReduction ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_RowsReduction< CSRMatrixType >(); +} + TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index edfe0bc28..2c0514c0a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -133,6 +133,13 @@ TYPED_TEST( EllpackMatrixTest, vectorProductTest ) test_VectorProduct< EllpackMatrixType >(); } +TYPED_TEST( EllpackMatrixTest, rowsReduction ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_RowsReduction< EllpackMatrixType >(); +} + TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 8d17b8be7..5efcb1eae 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -133,6 +133,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) test_VectorProduct< SlicedEllpackMatrixType >(); } +TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_RowsReduction< SlicedEllpackMatrixType >(); +} + TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -- GitLab From 8573f541dd5dd163e2f47b824787fe9464764c27 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 11 Dec 2019 18:00:42 +0100 Subject: [PATCH 028/179] Implementing sparse matrcies assignment. --- src/TNL/Containers/Segments/CSR.hpp | 11 +- src/TNL/Containers/Segments/Ellpack.hpp | 18 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 6 +- src/TNL/Matrices/Matrix.h | 9 +- src/TNL/Matrices/SparseMatrix.h | 19 +- src/TNL/Matrices/SparseMatrix.hpp | 88 +++- src/UnitTests/Matrices/SparseMatrixTest.hpp | 494 +++++++++--------- 7 files changed, 374 insertions(+), 271 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ef7431038..ccb483125 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -154,11 +154,12 @@ CSR< Device, Index >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getConstView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = offsetsView[ i ]; - const IndexType end = offsetsView[ i + 1 ]; - for( IndexType j = begin; j < end; j++ ) - if( ! f( i, j, args... ) ) + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = offsetsView[ segmentIdx ]; + const IndexType end = offsetsView[ segmentIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index d3d90be5e..337009e99 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -192,11 +192,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = i * segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx * segmentSize; const IndexType end = begin + segmentSize; - for( IndexType j = begin; j < end; j++ ) - if( ! f( i, j, args... ) ) + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -205,11 +206,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const IndexType storageSize = this->getStorageSize(); const IndexType alignedSize = this->alignedSize; - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = i; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx; const IndexType end = storageSize; - for( IndexType j = begin; j < end; j += alignedSize ) - if( ! f( i, j, args... ) ) + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index c8e74ec59..d721edb00 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -225,8 +225,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) - if( ! f( segmentIdx, globalIdx, args... ) ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -239,8 +240,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - if( ! f( segmentIdx, globalIdx, args... ) ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index a877fd5c2..4a038eb2e 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -39,7 +39,7 @@ public: using RealAllocatorType = RealAllocator; Matrix( const RealAllocatorType& allocator = RealAllocatorType() ); - + Matrix( const IndexType rows, const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); @@ -100,9 +100,9 @@ public: virtual Real getElement( const IndexType row, const IndexType column ) const = 0; - + const ValuesVector& getValues() const; - + ValuesVector& getValues(); // TODO: parallelize and optimize for sparse matrices @@ -137,7 +137,8 @@ public: __cuda_callable__ Index getValuesSize() const; - protected: + // TODO: restore this + //protected: IndexType rows, columns, numberOfColors; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 268fba6d3..b6a618e10 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -39,7 +39,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; - + // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; @@ -64,6 +64,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ @@ -167,10 +170,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ) const; + template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, @@ -201,7 +210,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getPaddingIndex() const; - protected: + +// TODO: restore it and also in Matrix +// protected: ColumnsVectorType columnIndexes; @@ -210,6 +221,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > IndexAllocator indexAlloctor; RealAllocator realAllocator; + + }; } // namespace Conatiners diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index e26693f6a..3605daaef 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -116,6 +116,32 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) this->columnIndexes = this->getPaddingIndex(); } +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + template< typename Real, template< typename, typename > class Segments, typename Device, @@ -554,6 +580,43 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + IndexType columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + +} + +template< typename Real, + template< typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} /*template< typename Real, template< typename, typename > class Segments, @@ -638,12 +701,31 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { + using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >; if( std::is_same< Device, Device2 >::value ) { - + RowsCapacitiesType rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setCompressedRowLengths( rowLengths ); + // TODO: Replace this with SparseMatrixView + const auto matrix_columns_view = matrix.columnIndexes.getConstView(); + const auto matrix_values_view = matrix.values.getConstView(); + const auto segments_view = this->segments.getConstView(); + auto this_columns_view = this->columnIndexes.getView(); + auto this_values_view = this->values.getView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) { + const IndexType column = matrix_columns_view[ globalIdx ]; + if( column != paddingIndex ) + { + const RealType value = matrix_values_view[ globalIdx ]; + IndexType thisGlobalIdx = segments_view.getGlobalIdx( rowIdx, localIdx ); + this_columns_view[ thisGlobalIdx ] = column; + this_values_view[ thisGlobalIdx ] = value; + } + }; + matrix.forAllRows( f ); } - - } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index b366f4e2f..07a60178f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -18,7 +18,7 @@ #include #include -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include template< typename MatrixHostFloat, typename MatrixHostInt > @@ -36,7 +36,7 @@ void cuda_test_GetType() bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename Matrix > @@ -45,13 +45,13 @@ void test_SetDimensions() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 9; const IndexType cols = 8; - + Matrix m; m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); } @@ -62,41 +62,41 @@ void test_SetCompressedRowLengths() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 10; const IndexType cols = 11; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); rowLengths.setValue( 3 ); - + IndexType rowLength = 1; for( IndexType i = 2; i < rows; i++ ) rowLengths.setElement( i, rowLength++ ); - + m.setCompressedRowLengths( rowLengths ); - + // Insert values into the rows. RealType value = 1; - + for( IndexType i = 0; i < 3; i++ ) // 0th row m.setElement( 0, i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) // 1st row m.setElement( 1, i, value++ ); - + for( IndexType i = 0; i < 1; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + for( IndexType i = 0; i < 2; i++ ) // 3rd row m.setElement( 3, i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) // 4th row m.setElement( 4, i, value++ ); - + for( IndexType i = 0; i < 4; i++ ) // 5th row m.setElement( 5, i, value++ ); @@ -111,8 +111,8 @@ void test_SetCompressedRowLengths() for( IndexType i = 0; i < 8; i++ ) // 9th row m.setElement( 9, i, value++ ); - - + + EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 ); EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 ); EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 ); @@ -131,21 +131,21 @@ void test_SetLike() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix1 m1; m1.reset(); m1.setDimensions( rows + 1, cols + 2 ); - + Matrix2 m2; m2.reset(); m2.setDimensions( rows, cols ); - + m1.setLike( m2 ); - - + + EXPECT_EQ( m1.getRows(), m2.getRows() ); EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } @@ -174,7 +174,7 @@ void test_GetNumberOfNonzeroMatrixElements() const IndexType rows = 10; const IndexType cols = 10; - + Matrix m; m.reset(); @@ -225,7 +225,7 @@ void test_Reset() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 sparse matrix: * @@ -235,16 +235,16 @@ void test_Reset() * | 0 0 0 0 | * \ 0 0 0 0 / */ - + const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.setDimensions( rows, cols ); - + m.reset(); - - + + EXPECT_EQ( m.getRows(), 0 ); EXPECT_EQ( m.getColumns(), 0 ); } @@ -255,7 +255,7 @@ void test_SetElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 10x10 sparse matrix: * @@ -270,15 +270,15 @@ void test_SetElement() * | 22 23 24 25 26 27 28 29 30 31 | * \ 32 33 34 35 36 37 38 39 40 41 / */ - + const IndexType rows = 10; const IndexType cols = 10; - + Matrix m; m.reset(); - + m.setDimensions( rows, cols ); - + typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); rowLengths.setElement( 0, 4 ); @@ -292,29 +292,29 @@ void test_SetElement() rowLengths.setElement( 8, 10 ); rowLengths.setElement( 9, 10 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < 4; i++ ) m.setElement( 0, 2 * i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) m.setElement( 1, i, value++ ); - + for( IndexType i = 0; i < 8; i++ ) m.setElement( 2, i, value++ ); - + for( IndexType i = 0; i < 2; i++ ) m.setElement( 3, i, value++ ); - + for( IndexType i = 4; i < 8; i++ ) m.setElement( i, 0, value++ ); - + for( IndexType j = 8; j < rows; j++) { for( IndexType i = 0; i < cols; i++ ) m.setElement( j, i, value++ ); } - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 0 ); EXPECT_EQ( m.getElement( 0, 2 ), 2 ); @@ -325,7 +325,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 0, 7 ), 0 ); EXPECT_EQ( m.getElement( 0, 8 ), 0 ); EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); EXPECT_EQ( m.getElement( 1, 1 ), 6 ); EXPECT_EQ( m.getElement( 1, 2 ), 7 ); @@ -336,7 +336,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 1, 7 ), 0 ); EXPECT_EQ( m.getElement( 1, 8 ), 0 ); EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); EXPECT_EQ( m.getElement( 2, 1 ), 9 ); EXPECT_EQ( m.getElement( 2, 2 ), 10 ); @@ -347,7 +347,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 2, 7 ), 15 ); EXPECT_EQ( m.getElement( 2, 8 ), 0 ); EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 0 ); @@ -358,7 +358,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 3, 7 ), 0 ); EXPECT_EQ( m.getElement( 3, 8 ), 0 ); EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); EXPECT_EQ( m.getElement( 4, 1 ), 0 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); @@ -369,7 +369,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 4, 7 ), 0 ); EXPECT_EQ( m.getElement( 4, 8 ), 0 ); EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); @@ -380,7 +380,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 5, 7 ), 0 ); EXPECT_EQ( m.getElement( 5, 8 ), 0 ); EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); EXPECT_EQ( m.getElement( 6, 1 ), 0 ); EXPECT_EQ( m.getElement( 6, 2 ), 0 ); @@ -391,7 +391,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 6, 7 ), 0 ); EXPECT_EQ( m.getElement( 6, 8 ), 0 ); EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); EXPECT_EQ( m.getElement( 7, 1 ), 0 ); EXPECT_EQ( m.getElement( 7, 2 ), 0 ); @@ -402,7 +402,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 7, 7 ), 0 ); EXPECT_EQ( m.getElement( 7, 8 ), 0 ); EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); EXPECT_EQ( m.getElement( 8, 1 ), 23 ); EXPECT_EQ( m.getElement( 8, 2 ), 24 ); @@ -413,7 +413,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 8, 7 ), 29 ); EXPECT_EQ( m.getElement( 8, 8 ), 30 ); EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); EXPECT_EQ( m.getElement( 9, 1 ), 33 ); EXPECT_EQ( m.getElement( 9, 2 ), 34 ); @@ -432,7 +432,7 @@ void test_AddElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 6x5 sparse matrix: * @@ -443,10 +443,10 @@ void test_AddElement() * | 0 11 0 0 0 | * \ 0 0 0 12 0 / */ - + const IndexType rows = 6; const IndexType cols = 5; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); @@ -454,61 +454,61 @@ void test_AddElement() rowLengths.setSize( rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < cols - 2; i++ ) // 0th row m.setElement( 0, i, value++ ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row m.setElement( 1, i, value++ ); - + for( IndexType i = 2; i < cols; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + m.setElement( 3, 0, value++ ); // 3rd row - + m.setElement( 4, 1, value++ ); // 4th row - + m.setElement( 5, 3, value++ ); // 5th row - - + + // Check the set elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 4 ); EXPECT_EQ( m.getElement( 1, 2 ), 5 ); EXPECT_EQ( m.getElement( 1, 3 ), 6 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 7 ); EXPECT_EQ( m.getElement( 2, 3 ), 8 ); EXPECT_EQ( m.getElement( 2, 4 ), 9 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 11 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); EXPECT_EQ( m.getElement( 5, 3 ), 12 ); EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - + // Add new elements to the old elements with a multiplying factor applied to the old elements. /* @@ -521,7 +521,7 @@ void test_AddElement() * | 0 11 0 0 0 | * \ 0 0 0 12 0 / */ - + /* * The following setup results in the following 6x5 sparse matrix: * @@ -532,57 +532,57 @@ void test_AddElement() * | 0 35 14 15 0 | * \ 0 0 16 41 18 / */ - + RealType newValue = 1; for( IndexType i = 0; i < cols - 2; i++ ) // 0th row m.addElement( 0, i, newValue++, 2.0 ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row m.addElement( 1, i, newValue++, 2.0 ); - + for( IndexType i = 2; i < cols; i++ ) // 2nd row m.addElement( 2, i, newValue++, 2.0 ); - + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row m.addElement( 3, i, newValue++, 2.0 ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row m.addElement( 4, i, newValue++, 2.0 ); - + for( IndexType i = 2; i < cols; i++ ) // 5th row m.addElement( 5, i, newValue++, 2.0 ); - - + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); EXPECT_EQ( m.getElement( 0, 2 ), 9 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 12 ); EXPECT_EQ( m.getElement( 1, 2 ), 15 ); EXPECT_EQ( m.getElement( 1, 3 ), 18 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 21 ); EXPECT_EQ( m.getElement( 2, 3 ), 24 ); EXPECT_EQ( m.getElement( 2, 4 ), 27 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); EXPECT_EQ( m.getElement( 3, 1 ), 11 ); EXPECT_EQ( m.getElement( 3, 2 ), 12 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 35 ); EXPECT_EQ( m.getElement( 4, 2 ), 14 ); EXPECT_EQ( m.getElement( 4, 3 ), 15 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 16 ); @@ -596,7 +596,7 @@ void test_SetRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 3x7 sparse matrix: * @@ -604,10 +604,10 @@ void test_SetRow() * | 2 2 2 0 0 0 0 | * \ 3 3 3 0 0 0 0 / */ - + const IndexType rows = 3; const IndexType cols = 7; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); @@ -616,7 +616,7 @@ void test_SetRow() rowLengths.setValue( 6 ); rowLengths.setElement( 1, 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < 3; i++ ) { @@ -624,19 +624,19 @@ void test_SetRow() m.setElement( 1, i, value + 1 ); m.setElement( 2, i, value + 2 ); } - + RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 }; RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 }; RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 }; - + RealType row = 0; IndexType elements = 3; - + m.setRow( row++, colIndexes1, row1, elements ); m.setRow( row++, colIndexes2, row2, elements ); m.setRow( row++, colIndexes3, row3, elements ); - - + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); EXPECT_EQ( m.getElement( 0, 2 ), 11 ); @@ -644,7 +644,7 @@ void test_SetRow() EXPECT_EQ( m.getElement( 0, 4 ), 0 ); EXPECT_EQ( m.getElement( 0, 5 ), 0 ); EXPECT_EQ( m.getElement( 0, 6 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); EXPECT_EQ( m.getElement( 1, 1 ), 22 ); EXPECT_EQ( m.getElement( 1, 2 ), 22 ); @@ -652,7 +652,7 @@ void test_SetRow() EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 1, 5 ), 0 ); EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 0 ); @@ -669,7 +669,7 @@ void test_VectorProduct() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - + /* * Sets up the following 4x4 sparse matrix: * @@ -678,10 +678,10 @@ void test_VectorProduct() * | 0 4 0 0 | * \ 0 0 5 0 / */ - + const IndexType m_rows_1 = 4; const IndexType m_cols_1 = 4; - + Matrix m_1; m_1.reset(); m_1.setDimensions( m_rows_1, m_cols_1 ); @@ -692,37 +692,37 @@ void test_VectorProduct() rowLengths_1.setElement( 2, 1 ); rowLengths_1.setElement( 3, 1 ); m_1.setCompressedRowLengths( rowLengths_1 ); - + RealType value_1 = 1; m_1.setElement( 0, 0, value_1++ ); // 0th row - + m_1.setElement( 1, 1, value_1++ ); // 1st row m_1.setElement( 1, 3, value_1++ ); - + m_1.setElement( 2, 1, value_1++ ); // 2nd row - + m_1.setElement( 3, 2, value_1++ ); // 3rd row - + VectorType inVector_1; inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) inVector_1.setElement( i, 2 ); - VectorType outVector_1; + VectorType outVector_1; outVector_1.setSize( m_rows_1 ); for( IndexType j = 0; j < outVector_1.getSize(); j++ ) outVector_1.setElement( j, 0 ); - - + + m_1.vectorProduct( inVector_1, outVector_1 ); - - + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); - - + + /* * Sets up the following 4x4 sparse matrix: * @@ -731,10 +731,10 @@ void test_VectorProduct() * | 5 6 7 0 | * \ 0 8 0 0 / */ - + const IndexType m_rows_2 = 4; const IndexType m_cols_2 = 4; - + Matrix m_2; m_2.reset(); m_2.setDimensions( m_rows_2, m_cols_2 ); @@ -744,39 +744,39 @@ void test_VectorProduct() rowLengths_2.setElement( 1, 1 ); rowLengths_2.setElement( 3, 1 ); m_2.setCompressedRowLengths( rowLengths_2 ); - + RealType value_2 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_2.setElement( 0, i, value_2++ ); - + m_2.setElement( 1, 3, value_2++ ); // 1st row - + for( IndexType i = 0; i < 3; i++ ) // 2nd row m_2.setElement( 2, i, value_2++ ); - + for( IndexType i = 1; i < 2; i++ ) // 3rd row m_2.setElement( 3, i, value_2++ ); - + VectorType inVector_2; inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) inVector_2.setElement( i, 2 ); - VectorType outVector_2; + VectorType outVector_2; outVector_2.setSize( m_rows_2 ); for( IndexType j = 0; j < outVector_2.getSize(); j++ ) outVector_2.setElement( j, 0 ); - - + + m_2.vectorProduct( inVector_2, outVector_2 ); - - + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); - - + + /* * Sets up the following 4x4 sparse matrix: * @@ -785,10 +785,10 @@ void test_VectorProduct() * | 7 8 9 0 | * \ 0 10 11 12 / */ - + const IndexType m_rows_3 = 4; const IndexType m_cols_3 = 4; - + Matrix m_3; m_3.reset(); m_3.setDimensions( m_rows_3, m_cols_3 ); @@ -796,40 +796,40 @@ void test_VectorProduct() rowLengths_3.setSize( m_rows_3 ); rowLengths_3.setValue( 3 ); m_3.setCompressedRowLengths( rowLengths_3 ); - + RealType value_3 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_3.setElement( 0, i, value_3++ ); - + for( IndexType i = 1; i < 4; i++ ) m_3.setElement( 1, i, value_3++ ); // 1st row - + for( IndexType i = 0; i < 3; i++ ) // 2nd row m_3.setElement( 2, i, value_3++ ); - + for( IndexType i = 1; i < 4; i++ ) // 3rd row m_3.setElement( 3, i, value_3++ ); - + VectorType inVector_3; inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) inVector_3.setElement( i, 2 ); - VectorType outVector_3; + VectorType outVector_3; outVector_3.setSize( m_rows_3 ); for( IndexType j = 0; j < outVector_3.getSize(); j++ ) outVector_3.setElement( j, 0 ); - - + + m_3.vectorProduct( inVector_3, outVector_3 ); - - + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); - - + + /* * Sets up the following 8x8 sparse matrix: * @@ -842,10 +842,10 @@ void test_VectorProduct() * | 26 27 28 29 30 0 0 0 | * \ 31 32 33 34 35 0 0 0 / */ - + const IndexType m_rows_4 = 8; const IndexType m_cols_4 = 8; - + Matrix m_4; m_4.reset(); m_4.setDimensions( m_rows_4, m_cols_4 ); @@ -856,48 +856,48 @@ void test_VectorProduct() rowLengths_4.setElement( 6, 5 ); rowLengths_4.setElement( 7, 5 ); m_4.setCompressedRowLengths( rowLengths_4 ); - + RealType value_4 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_4.setElement( 0, i, value_4++ ); - + m_4.setElement( 0, 5, value_4++ ); - + for( IndexType i = 1; i < 5; i++ ) // 1st row m_4.setElement( 1, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 2nd row m_4.setElement( 2, i, value_4++ ); - + for( IndexType i = 1; i < 5; i++ ) // 3rd row m_4.setElement( 3, i, value_4++ ); - + for( IndexType i = 2; i < 6; i++ ) // 4th row m_4.setElement( 4, i, value_4++ ); - + for( IndexType i = 3; i < 7; i++ ) // 5th row m_4.setElement( 5, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 6th row m_4.setElement( 6, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 7th row m_4.setElement( 7, i, value_4++ ); - + VectorType inVector_4; inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) inVector_4.setElement( i, 2 ); - VectorType outVector_4; + VectorType outVector_4; outVector_4.setSize( m_rows_4 ); for( IndexType j = 0; j < outVector_4.getSize(); j++ ) outVector_4.setElement( j, 0 ); - - + + m_4.vectorProduct( inVector_4, outVector_4 ); - - + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); @@ -906,8 +906,8 @@ void test_VectorProduct() EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); - - + + /* * Sets up the following 8x8 sparse matrix: * @@ -976,7 +976,7 @@ void test_VectorProduct() for( IndexType i = 0; i < inVector_5.getSize(); i++ ) inVector_5.setElement( i, 2 ); - VectorType outVector_5; + VectorType outVector_5; outVector_5.setSize( m_rows_5 ); for( IndexType j = 0; j < outVector_5.getSize(); j++ ) outVector_5.setElement( j, 0 ); @@ -1077,6 +1077,8 @@ void test_RowsReduction() }; m.allRowsReduction( fetch, reduce, keep, 0 ); EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); //// // Compute max norm @@ -1102,7 +1104,7 @@ void test_PerformSORIteration() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 4x4 sparse matrix: * @@ -1111,10 +1113,10 @@ void test_PerformSORIteration() * | 0 1 4 1 | * \ 0 0 1 4 / */ - + const IndexType m_rows = 4; const IndexType m_cols = 4; - + Matrix m; m.reset(); m.setDimensions( m_rows, m_cols ); @@ -1122,54 +1124,54 @@ void test_PerformSORIteration() rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0); - + m.setElement( 1, 0, 1.0 ); // 1st row m.setElement( 1, 1, 4.0 ); m.setElement( 1, 2, 1.0 ); - + m.setElement( 2, 1, 1.0 ); // 2nd row m.setElement( 2, 2, 4.0 ); m.setElement( 2, 3, 1.0 ); - + m.setElement( 3, 2, 1.0 ); // 3rd row m.setElement( 3, 3, 4.0 ); - + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; RealType xVector [ 4 ] = { 1, 1, 1, 1 }; - + IndexType row = 0; RealType omega = 1; - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 1.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 0.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 0.0 ); @@ -1183,7 +1185,7 @@ void test_OperatorEquals() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) return; else @@ -1229,33 +1231,33 @@ void test_OperatorEquals() m_host.setElement( 0, 4, value++ ); // 0th row m_host.setElement( 0, 5, value++ ); - + m_host.setElement( 1, 1, value++ ); // 1st row m_host.setElement( 1, 3, value++ ); for( IndexType i = 1; i < 3; i++ ) // 2nd row m_host.setElement( 2, i, value++ ); - + m_host.setElement( 2, 4, value++ ); // 2nd row - + for( IndexType i = 1; i < 5; i++ ) // 3rd row m_host.setElement( 3, i, value++ ); m_host.setElement( 4, 1, value++ ); // 4th row - + for( IndexType i = 1; i < 7; i++ ) // 5th row m_host.setElement( 5, i, value++ ); - + for( IndexType i = 0; i < 7; i++ ) // 6th row m_host.setElement( 6, i, value++ ); - + for( IndexType i = 0; i < 8; i++ ) // 7th row m_host.setElement( 7, i, value++ ); - + for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 m_host.setElement( i, 7, 1); - + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); @@ -1264,7 +1266,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); @@ -1273,7 +1275,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); @@ -1282,7 +1284,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); @@ -1291,7 +1293,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); @@ -1300,7 +1302,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); @@ -1309,7 +1311,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); @@ -1318,7 +1320,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); @@ -1348,7 +1350,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); @@ -1357,7 +1359,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); @@ -1366,7 +1368,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); @@ -1375,7 +1377,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); @@ -1384,7 +1386,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); @@ -1393,7 +1395,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); @@ -1402,7 +1404,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); @@ -1411,22 +1413,22 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - + // Try vectorProduct with copied cuda matrix to see if it works correctly. using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; - + VectorType inVector; inVector.setSize( m_cols ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) + for( IndexType i = 0; i < inVector.getSize(); i++ ) inVector.setElement( i, 2 ); - VectorType outVector; + VectorType outVector; outVector.setSize( m_rows ); for( IndexType j = 0; j < outVector.getSize(); j++ ) outVector.setElement( j, 0 ); - + m_cuda.vectorProduct( inVector, outVector ); - + EXPECT_EQ( outVector.getElement( 0 ), 32 ); EXPECT_EQ( outVector.getElement( 1 ), 28 ); EXPECT_EQ( outVector.getElement( 2 ), 56 ); @@ -1444,7 +1446,7 @@ void test_SaveAndLoad( const char* filename ) using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 4x4 sparse matrix: * @@ -1453,10 +1455,10 @@ void test_SaveAndLoad( const char* filename ) * | 6 7 8 0 | * \ 0 9 10 11 / */ - + const IndexType m_rows = 4; const IndexType m_cols = 4; - + Matrix savedMatrix; savedMatrix.reset(); savedMatrix.setDimensions( m_rows, m_cols ); @@ -1464,22 +1466,22 @@ void test_SaveAndLoad( const char* filename ) rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); savedMatrix.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row savedMatrix.setElement( 0, i, value++ ); - + savedMatrix.setElement( 1, 1, value++ ); savedMatrix.setElement( 1, 3, value++ ); // 1st row - + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row savedMatrix.setElement( 2, i, value++ ); - + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row savedMatrix.setElement( 3, i, value++ ); - + ASSERT_NO_THROW( savedMatrix.save( filename ) ); - + Matrix loadedMatrix; loadedMatrix.reset(); loadedMatrix.setDimensions( m_rows, m_cols ); @@ -1487,51 +1489,51 @@ void test_SaveAndLoad( const char* filename ) rowLengths2.setSize( m_rows ); rowLengths2.setValue( 3 ); loadedMatrix.setCompressedRowLengths( rowLengths2 ); - - + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); - - + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); - + EXPECT_EQ( std::remove( filename ), 0 ); } @@ -1541,7 +1543,7 @@ void test_Print() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 sparse matrix: * @@ -1551,10 +1553,10 @@ void test_Print() * | 0 8 9 10 | * \ 0 0 11 12 / */ - + const IndexType m_rows = 5; const IndexType m_cols = 4; - + Matrix m; m.reset(); m.setDimensions( m_rows, m_cols ); @@ -1562,40 +1564,40 @@ void test_Print() rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row m.setElement( 0, i, value++ ); - + m.setElement( 1, 3, value++ ); // 1st row - + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row m.setElement( 3, i, value++ ); - + for( IndexType i = 2; i < m_cols; i++ ) // 4th row m.setElement( 4, i, value++ ); - + #include std::stringstream printed; std::stringstream couted; - + //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + auto old_buf = std::cout.rdbuf(printed.rdbuf()); m.print( std::cout ); //all the std::cout goes to ss std::cout.rdbuf(old_buf); //reset - + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" "Row: 1 -> Col:3->4\t\n" "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" "Row: 4 -> Col:2->11 Col:3->12\t\n"; - - + + EXPECT_EQ( printed.str(), couted.str() ); } -- GitLab From 91d38ffa77b55f590c79578cbd0f02cb4c1dcad4 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 11 Dec 2019 18:12:36 +0100 Subject: [PATCH 029/179] Commenting out the code which cannot be compiled. --- src/TNL/Matrices/SparseMatrix.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 3605daaef..0d9ee0b06 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -594,13 +594,13 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + /*auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); return zero; }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/ } @@ -704,7 +704,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >; if( std::is_same< Device, Device2 >::value ) { - RowsCapacitiesType rowLengths; + /*RowsCapacitiesType rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setCompressedRowLengths( rowLengths ); // TODO: Replace this with SparseMatrixView @@ -724,7 +724,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 this_values_view[ thisGlobalIdx ] = value; } }; - matrix.forAllRows( f ); + matrix.forAllRows( f );*/ } } -- GitLab From 170d652a6fdd247efecacbac22bd6230f488af1b Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 11 Dec 2019 18:24:21 +0100 Subject: [PATCH 030/179] Adding CSRView. --- src/TNL/Containers/Segments/CSRView.h | 105 +++++++++++ src/TNL/Containers/Segments/CSRView.hpp | 221 ++++++++++++++++++++++++ 2 files changed, 326 insertions(+) create mode 100644 src/TNL/Containers/Segments/CSRView.h create mode 100644 src/TNL/Containers/Segments/CSRView.hpp diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h new file mode 100644 index 000000000..5eeb7ecb3 --- /dev/null +++ b/src/TNL/Containers/Segments/CSRView.h @@ -0,0 +1,105 @@ +/*************************************************************************** + CSRView.h - description + ------------------- + begin : Dec 11, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index > +class CSRView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolderView = typedef Containers::Vector< IndexType, DeviceType, IndexType >::ViewType; + + __cuda_callable__ + CSRView(); + + __cuda_callable__ + CSRView( const OffsetsHolderView& offsets ); + + __cuda_callable__ + CSRView( const CSRView& csr_view ); + + __cuda_callable__ + CSRView( const CSRView&& csr_view ); + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + OffsetsHolderView offsets; +}; + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp new file mode 100644 index 000000000..30ed24071 --- /dev/null +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -0,0 +1,221 @@ +/*************************************************************************** + CSRView.hpp - description + ------------------- + begin : Dec 11, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView() +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const OffsetsHolderView& offsets_view ) + : offsets( offsets_view ) +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView& csr_view ) + : offsets( csr_view.offsest ) +{ + +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView&& csr_view ) + : offsets( std::move( csr_view.offsest ) ) +{ + +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSegmentsCount() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +#else + return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); +#endif + } + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSize() const +{ + return this->getStorageSize(); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getStorageSize() const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ this->getSegmentsCount() ]; +#else + return offsets.getElement( this->getSegmentsCount() ); +#endif + } + return offsets[ this->getSegmentsCount() ]; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +CSRView< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSRView< Device, Index >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = offsetsView[ segmentIdx ]; + const IndexType end = offsetsView[ segmentIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSRView< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSRView< Device, Index >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSRView< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index > +void +CSRView< Device, Index >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index > +void +CSRView< Device, Index >:: +load( File& file ) +{ + file >> this->offsets; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL -- GitLab From 46e99b135c8ebc3bb23765261a6ba7f3ab2287c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 12 Dec 2019 21:44:39 +0100 Subject: [PATCH 031/179] Adding segments views. --- src/Benchmarks/SpMV/spmv.h | 8 +- src/TNL/Containers/Segments/CSR.h | 14 +- src/TNL/Containers/Segments/CSR.hpp | 111 ++++-- src/TNL/Containers/Segments/CSRView.h | 18 +- src/TNL/Containers/Segments/CSRView.hpp | 33 +- src/TNL/Containers/Segments/Ellpack.h | 9 + src/TNL/Containers/Segments/Ellpack.hpp | 80 +++- src/TNL/Containers/Segments/EllpackView.h | 111 ++++++ src/TNL/Containers/Segments/EllpackView.hpp | 293 +++++++++++++++ src/TNL/Containers/Segments/SlicedEllpack.h | 10 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 77 +++- .../Containers/Segments/SlicedEllpackView.h | 116 ++++++ .../Containers/Segments/SlicedEllpackView.hpp | 342 ++++++++++++++++++ src/TNL/Containers/Segments/details/CSR.h | 89 +++++ src/TNL/Containers/Segments/details/Ellpack.h | 107 ++++++ .../Segments/details/SlicedEllpack.h | 106 ++++++ src/TNL/Matrices/SparseMatrix.h | 12 +- src/TNL/Matrices/SparseMatrix.hpp | 90 ++--- .../Containers/Segments/SegmentsTest.hpp | 25 ++ .../SparseMatrixTest_Ellpack_segments.h | 8 +- .../SparseMatrixTest_SlicedEllpack_segments.h | 8 +- 21 files changed, 1521 insertions(+), 146 deletions(-) create mode 100644 src/TNL/Containers/Segments/EllpackView.h create mode 100644 src/TNL/Containers/Segments/EllpackView.hpp create mode 100644 src/TNL/Containers/Segments/SlicedEllpackView.h create mode 100644 src/TNL/Containers/Segments/SlicedEllpackView.hpp create mode 100644 src/TNL/Containers/Segments/details/CSR.h create mode 100644 src/TNL/Containers/Segments/details/Ellpack.h create mode 100644 src/TNL/Containers/Segments/details/SlicedEllpack.h diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index a6acb52fd..66f4fb236 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -45,14 +45,14 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; template< typename Real, typename Device, typename Index > using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >; -template< typename Device, typename Index > -using EllpackSegments = Containers::Segments::Ellpack< Device, Index >; +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >; -template< typename Device, typename Index > -using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index >; +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >; diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index ecd1de983..b83e43f1d 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -10,22 +10,28 @@ #pragma once +#include + #include +#include namespace TNL { namespace Containers { namespace Segments { template< typename Device, - typename Index > + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { public: using DeviceType = Device; using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; using SegmentsSizes = OffsetsHolder; + using ViewType = CSRView< Device, Index >; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; CSR(); @@ -41,6 +47,10 @@ class CSR template< typename SizesHolder = OffsetsHolder > void setSegmentsSizes( const SizesHolder& sizes ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Number segments. */ diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ccb483125..a8f12e7dc 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace TNL { namespace Containers { @@ -20,64 +21,92 @@ namespace TNL { template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR() { } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const SegmentsSizes& segmentsSizes ) { this->setSegmentsSizes( segmentsSizes ); } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const CSR& csr ) : offsets( csr.offsets ) { } template< typename Device, - typename Index > -CSR< Device, Index >:: + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) { } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename SizesHolder > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { - this->offsets.setSize( sizes.getSize() + 1 ); + details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); + /*this->offsets.setSize( sizes.getSize() + 1 ); auto view = this->offsets.getView( 0, sizes.getSize() ); view = sizes; this->offsets.setElement( sizes.getSize(), 0 ); - this->offsets.template scan< Algorithms::ScanType::Exclusive >(); + this->offsets.template scan< Algorithms::ScanType::Exclusive >();*/ } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ViewType +CSR< Device, Index, IndexAllocator >:: +getView() +{ + return ViewType( this->offsets.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ConstViewType +CSR< Device, Index, IndexAllocator >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentsCount() const { return this->offsets.getSize() - 1; } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -92,20 +121,22 @@ getSegmentSize( const IndexType segmentIdx ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSize() const { return this->getStorageSize(); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getStorageSize() const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -120,10 +151,11 @@ getStorageSize() const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ Index -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -138,19 +170,21 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > __cuda_callable__ void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Function, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getConstView(); @@ -166,20 +200,22 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator> template< typename Function, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -196,28 +232,31 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: save( File& file ) const { file << this->offsets; } template< typename Device, - typename Index > + typename Index, + typename IndexAllocator > void -CSR< Device, Index >:: +CSR< Device, Index, IndexAllocator >:: load( File& file ) { file >> this->offsets; diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 5eeb7ecb3..2f8957970 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -10,6 +10,8 @@ #pragma once +#include + #include namespace TNL { @@ -24,13 +26,19 @@ class CSRView using DeviceType = Device; using IndexType = Index; - using OffsetsHolderView = typedef Containers::Vector< IndexType, DeviceType, IndexType >::ViewType; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType; + using ViewType = CSRView; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; __cuda_callable__ CSRView(); __cuda_callable__ - CSRView( const OffsetsHolderView& offsets ); + CSRView( const OffsetsView&& offsets ); + + __cuda_callable__ + CSRView( const ConstOffsetsView&& offsets ); __cuda_callable__ CSRView( const CSRView& csr_view ); @@ -38,6 +46,10 @@ class CSRView __cuda_callable__ CSRView( const CSRView&& csr_view ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Number segments. */ @@ -96,7 +108,7 @@ class CSRView protected: - OffsetsHolderView offsets; + OffsetsView offsets; }; } // namespace Segements } // namespace Conatiners diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 30ed24071..f50a74985 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -31,7 +31,7 @@ template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: -CSRView( const OffsetsHolderView& offsets_view ) +CSRView( const OffsetsView&& offsets_view ) : offsets( offsets_view ) { } @@ -40,10 +40,18 @@ template< typename Device, typename Index > __cuda_callable__ CSRView< Device, Index >:: -CSRView( const CSRView& csr_view ) - : offsets( csr_view.offsest ) +CSRView( const ConstOffsetsView&& offsets_view ) + : offsets( offsets_view ) { +} +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView& csr_view ) + : offsets( csr_view.offsets ) +{ } template< typename Device, @@ -51,9 +59,26 @@ template< typename Device, __cuda_callable__ CSRView< Device, Index >:: CSRView( const CSRView&& csr_view ) - : offsets( std::move( csr_view.offsest ) ) + : offsets( std::move( csr_view.offsets ) ) +{ +} + +template< typename Device, + typename Index > +typename CSRView< Device, Index >::ViewType +CSRView< Device, Index >:: +getView() { + return ViewType( this->offsets ); +} +template< typename Device, + typename Index > +typename CSRView< Device, Index >::ConstViewType +CSRView< Device, Index >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index b08ad0f04..9c81a8428 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -11,6 +11,7 @@ #pragma once #include +#include namespace TNL { namespace Containers { @@ -18,6 +19,7 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int Alignment = 32 > class Ellpack @@ -30,6 +32,9 @@ class Ellpack static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; + using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; + //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; + Ellpack(); @@ -41,6 +46,10 @@ class Ellpack Ellpack( const Ellpack&& segments ); + ViewType getView(); + + //ConstViewType getConstView() const; + /** * \brief Set sizes of particular segments. */ diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 337009e99..482c87d4f 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -21,9 +21,10 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack() : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -31,9 +32,10 @@ Ellpack() template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const SegmentsSizes& segmentsSizes ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -42,9 +44,10 @@ Ellpack( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -53,9 +56,10 @@ Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const Ellpack& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -63,9 +67,10 @@ Ellpack( const Ellpack& ellpack ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: Ellpack( const Ellpack&& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -73,11 +78,35 @@ Ellpack( const Ellpack&& ellpack ) template< typename Device, typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +/*template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ConstViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +}*/ + +template< typename Device, + typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename SizesHolder > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); @@ -90,10 +119,11 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) { this->segmentSize = segmentSize; @@ -107,11 +137,12 @@ setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentsCount() const { return this->size; @@ -119,11 +150,12 @@ getSegmentsCount() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const { return this->segmentSize; @@ -131,11 +163,12 @@ getSegmentSize( const IndexType segmentIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSize() const { return this->size * this->segmentSize; @@ -144,11 +177,12 @@ getSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getStorageSize() const { return this->alignedSize * this->segmentSize; @@ -156,11 +190,12 @@ getStorageSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ Index -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { if( RowMajorOrder ) @@ -171,22 +206,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > __cuda_callable__ void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { if( RowMajorOrder ) @@ -220,11 +257,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); @@ -232,11 +270,12 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { if( RowMajorOrder ) @@ -272,11 +311,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -284,10 +324,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: save( File& file ) const { file.save( &segmentSize ); @@ -297,10 +338,11 @@ save( File& file ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int Alignment > void -Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: load( File& file ) { file.load( &segmentSize ); diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h new file mode 100644 index 000000000..adbfee629 --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -0,0 +1,111 @@ +/*************************************************************************** + EllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +#include + + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class EllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + using ViewType = EllpackView; + //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; + + __cuda_callable__ + EllpackView(); + + __cuda_callable__ + EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ); + + __cuda_callable__ + EllpackView( const EllpackView& ellpackView ); + + __cuda_callable__ + EllpackView( const EllpackView&& ellpackView ); + + ViewType getView(); + + //ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp new file mode 100644 index 000000000..d124633ff --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -0,0 +1,293 @@ +/*************************************************************************** + EllpackView.hpp - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ) + : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +/*template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +}*/ + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentsCount() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSize() const +{ + return this->size * this->segmentSize; +} + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getStorageSize() const +{ + return this->alignedSize * this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx; + const IndexType end = storageSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSize(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + if( RowMajorOrder ) + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j += alignedSize ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +save( File& file ) const +{ + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +load( File& file ) +{ + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index ecc2c8c7e..fc514c51f 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -11,6 +11,7 @@ #pragma once #include +#include namespace TNL { namespace Containers { @@ -18,6 +19,7 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int SliceSize = 32 > class SlicedEllpack @@ -26,9 +28,11 @@ class SlicedEllpack using DeviceType = Device; using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; static constexpr int getSliceSize() { return SliceSize; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; SlicedEllpack(); @@ -38,6 +42,10 @@ class SlicedEllpack SlicedEllpack( const SlicedEllpack&& segments ); + ViewType getView(); + + ConstViewType getConstView() const; + /** * \brief Set sizes of particular segments. */ diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index d721edb00..bdf28ff73 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -22,9 +22,10 @@ namespace TNL { template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -32,9 +33,10 @@ SlicedEllpack() template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -43,9 +45,10 @@ SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -55,9 +58,10 @@ SlicedEllpack( const SlicedEllpack& slicedEllpack ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: SlicedEllpack( const SlicedEllpack&& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -67,11 +71,36 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack ) template< typename Device, typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename SizesHolder > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentsCount = sizes.getSize(); @@ -79,7 +108,7 @@ setSegmentsSizes( const SizesHolder& sizes ) this->sliceOffsets.setSize( slicesCount + 1 ); this->sliceOffsets = 0; this->sliceSegmentSizes.setSize( slicesCount ); - Ellpack< DeviceType, IndexType, true > ellpack; + Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack; ellpack.setSegmentsSizes( slicesCount, SliceSize ); const IndexType _size = sizes.getSize(); @@ -106,11 +135,12 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentsCount() const { return this->segmentsCount; @@ -118,11 +148,12 @@ getSegmentsCount() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentSize( const IndexType segmentIdx ) const { const Index sliceIdx = segmentIdx / SliceSize; @@ -140,11 +171,12 @@ getSegmentSize( const IndexType segmentIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSize() const { return this->size; @@ -152,11 +184,12 @@ getSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getStorageSize() const { return this->alignedSize; @@ -164,11 +197,12 @@ getStorageSize() const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ Index -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -197,22 +231,24 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > __cuda_callable__ void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); @@ -251,11 +287,12 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -263,11 +300,12 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -307,11 +345,12 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -319,10 +358,11 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: save( File& file ) const { file.save( &size ); @@ -334,10 +374,11 @@ save( File& file ) const template< typename Device, typename Index, + typename IndexAllocator, bool RowMajorOrder, int SliceSize > void -SlicedEllpack< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: load( File& file ) { file.load( &size ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h new file mode 100644 index 000000000..275baacf5 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -0,0 +1,116 @@ +/*************************************************************************** + SlicedEllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = SlicedEllpackView; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; + + __cuda_callable__ + SlicedEllpackView(); + + __cuda_callable__ + SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ); + + ViewType getView(); + + ConstViewType getConstView() const; + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsView sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp new file mode 100644 index 000000000..f2e03bd38 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -0,0 +1,342 @@ +/*************************************************************************** + SlicedEllpackView.hpp - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView() + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ) + : size( size ), alignedSize( alignedSize ), segmentsCount( segmentsCount ), + sliceOffsets( std::forward< OffsetsView >( sliceOffsets ) ), sliceSegmentSizes( std::forward< OffsetsView >( sliceSegmentSizes ) ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentsCount() const +{ + return this->segmentsCount; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + const Index sliceIdx = segmentIdx / SliceSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + return this->sliceSegmentSizes[ sliceIdx ]; + else + { +#ifdef __CUDA_ARCH__ + return this->sliceSegmentSizes[ sliceIdx ]; +#else + return this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getStorageSize() const +{ + return this->alignedSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + IndexType sliceOffset, segmentSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + { + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + } + else + { +#ifdef __CUDA__ARCH__ + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; +#else + sliceOffset = this->sliceOffsets.getElement( sliceIdx ); + segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } + if( RowMajorOrder ) + return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; + else + return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + RealType aux( zero ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +save( File& file ) const +{ + file.save( &size ); + file.save( &alignedSize ); + file.save( &segmentsCount ); + file << this->sliceOffsets; + file << this->sliceSegmentSizes; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +load( File& file ) +{ + file.load( &size ); + file.load( &alignedSize ); + file.load( &segmentsCount ); + file >> this->sliceOffsets; + file >> this->sliceSegmentSizes; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h new file mode 100644 index 000000000..47e768d28 --- /dev/null +++ b/src/TNL/Containers/Segments/details/CSR.h @@ -0,0 +1,89 @@ +/*************************************************************************** + CSR.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + + +namespace TNL { + namespace Containers { + namespace Segments { + namespace details { + +template< typename Device, + typename Index > +class CSR +{ + public: + + using DeviceType = Device; + using IndexType = Index; + + template< typename SizesHolder, typename CSROffsets > + static void setSegmentsSizes( const SizesHolder& sizes, CSROffsets& offsets ) + { + offsets.setSize( sizes.getSize() + 1 ); + auto view = offsets.getView( 0, sizes.getSize() ); + view = sizes; + offsets.setElement( sizes.getSize(), 0 ); + offsets.template scan< Algorithms::ScanType::Exclusive >(); + } + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; +}; + } // namespace details + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h new file mode 100644 index 000000000..b08ad0f04 --- /dev/null +++ b/src/TNL/Containers/Segments/details/Ellpack.h @@ -0,0 +1,107 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + + Ellpack(); + + Ellpack( const SegmentsSizes& sizes ); + + Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h new file mode 100644 index 000000000..ecc2c8c7e --- /dev/null +++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h @@ -0,0 +1,106 @@ +/*************************************************************************** + SlicedEllpack.h - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + + SlicedEllpack(); + + SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + SlicedEllpack( const SlicedEllpack& segments ); + + SlicedEllpack( const SlicedEllpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsHolder sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index b6a618e10..1512f8574 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -17,7 +17,7 @@ namespace TNL { namespace Matrices { template< typename Real, - template< typename Device_, typename Index_ > class Segments, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments, typename Device = Devices::Host, typename Index = int, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, @@ -27,9 +27,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > public: using RealType = Real; - template< typename Device_, typename Index_ > - using SegmentsTemplate = Segments< Device_, Index_ >; - using SegmentsType = Segments< Device, Index >; + template< typename Device_, typename Index_, typename IndexAllocator_ > + using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; + using SegmentsType = Segments< Device, Index, IndexAllocator >; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; @@ -77,7 +77,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; - template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); IndexType getNumberOfNonzeroMatrixElements() const; @@ -191,7 +191,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > // cross-device copy assignment template< typename Real2, - template< typename, typename > class Segments2, + template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 0d9ee0b06..e24ed2f44 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -18,7 +18,7 @@ namespace TNL { namespace Matrices { template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -31,7 +31,7 @@ SparseMatrix( const RealAllocatorType& realAllocator, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -43,7 +43,7 @@ SparseMatrix( const SparseMatrix& m ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -55,7 +55,7 @@ SparseMatrix( const SparseMatrix&& m ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -70,7 +70,7 @@ SparseMatrix( const IndexType rows, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -86,7 +86,7 @@ getSerializationType() } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -99,7 +99,7 @@ getSerializationTypeVirtual() const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -117,7 +117,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -143,7 +143,7 @@ getCompressedRowLengths( Vector& rowLengths ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -156,7 +156,7 @@ getRowLength( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -170,7 +170,7 @@ getRowLengthFast( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -183,7 +183,7 @@ getNonZeroRowLength( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -197,12 +197,12 @@ getNonZeroRowLengthFast( const IndexType row ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > - template< typename Real2, template< typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) @@ -211,7 +211,7 @@ setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -229,7 +229,7 @@ getNumberOfNonzeroMatrixElements() const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -244,7 +244,7 @@ reset() } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -260,7 +260,7 @@ setElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -275,7 +275,7 @@ setElement( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -292,7 +292,7 @@ addElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -358,7 +358,7 @@ addElement( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -374,7 +374,7 @@ setRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -403,7 +403,7 @@ setRow( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -421,7 +421,7 @@ addRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -439,7 +439,7 @@ addRow( const IndexType row, template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -454,7 +454,7 @@ getElementFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -477,7 +477,7 @@ getElement( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -493,7 +493,7 @@ getRowFast( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -509,7 +509,7 @@ rowVectorProduct( const IndexType row, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -544,7 +544,7 @@ vectorProduct( const InVector& inVector, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -567,7 +567,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -581,7 +581,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -605,7 +605,7 @@ forRows( IndexType first, IndexType last, Function& function ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -619,7 +619,7 @@ forAllRows( Function& function ) const } /*template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -635,7 +635,7 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -650,7 +650,7 @@ getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, }*/ template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -668,7 +668,7 @@ performSORIteration( const Vector1& b, // copy assignment template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -686,13 +686,13 @@ operator=( const SparseMatrix& matrix ) // cross-device copy assignment template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, typename IndexAllocator > template< typename Real2, - template< typename, typename > class Segments2, + template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, @@ -729,7 +729,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -744,7 +744,7 @@ save( File& file ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -759,7 +759,7 @@ load( File& file ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -772,7 +772,7 @@ save( const String& fileName ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -785,7 +785,7 @@ load( const String& fileName ) } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, @@ -811,7 +811,7 @@ print( std::ostream& str ) const } template< typename Real, - template< typename, typename > class Segments, + template< typename, typename, typename > class Segments, typename Device, typename Index, typename RealAllocator, diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index acc75655f..5e74f96b0 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -52,6 +52,16 @@ void test_SetSegmentsSizes_EqualSizes() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); } template< typename Segments > @@ -89,6 +99,16 @@ void test_SetSegmentsSizes_EqualSizes_EllpackOnly() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); } template< typename Segments > @@ -136,6 +156,11 @@ void test_AllReduction_MaximumInSegments() for( IndexType i = 0; i < segmentsCount; i++ ) EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); + + result_view = 0; + segments.getView().allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); } #endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 2c0514c0a..16c22d9ca 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -28,14 +28,14 @@ protected: //// // Row-major format is used for the host system -template< typename Device, typename Index > -using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >; +template< typename Device, typename Index, typename IndexAlocator > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; //// // Column-major format is used for GPUs -template< typename Device, typename Index > -using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 5efcb1eae..8597121e4 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -28,14 +28,14 @@ protected: //// // Row-major format is used for the host system -template< typename Device, typename Index > -using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; //// // Column-major format is used for GPUs -template< typename Device, typename Index > -using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >; +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types -- GitLab From 482a96529145bae8d6902544ac0fc834f4fa7cc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 12 Dec 2019 22:15:49 +0100 Subject: [PATCH 032/179] Refactoring segments. --- src/TNL/Containers/Segments/CSR.hpp | 25 +---------- src/TNL/Containers/Segments/CSRView.hpp | 21 ++-------- src/TNL/Containers/Segments/details/CSR.h | 41 +++++++++++++++---- src/TNL/Containers/Segments/details/Ellpack.h | 2 - .../Segments/details/SlicedEllpack.h | 2 - 5 files changed, 37 insertions(+), 54 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index a8f12e7dc..280ed6ebf 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -63,11 +63,6 @@ CSR< Device, Index, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); - /*this->offsets.setSize( sizes.getSize() + 1 ); - auto view = this->offsets.getView( 0, sizes.getSize() ); - view = sizes; - this->offsets.setElement( sizes.getSize(), 0 ); - this->offsets.template scan< Algorithms::ScanType::Exclusive >();*/ } template< typename Device, @@ -109,15 +104,7 @@ Index CSR< Device, Index, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; -#else - return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); -#endif - } - return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; + return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); } template< typename Device, @@ -139,15 +126,7 @@ Index CSR< Device, Index, IndexAllocator >:: getStorageSize() const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ this->getSegmentsCount() ]; -#else - return offsets.getElement( this->getSegmentsCount() ); -#endif - } - return offsets[ this->getSegmentsCount() ]; + return details::CSR< Device, Index >::getStorageSize( this->offsets ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index f50a74985..dd4c434ba 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace TNL { namespace Containers { @@ -98,15 +99,7 @@ Index CSRView< Device, Index >:: getSegmentSize( const IndexType segmentIdx ) const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; -#else - return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); -#endif - } - return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; + return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); } template< typename Device, @@ -126,15 +119,7 @@ Index CSRView< Device, Index >:: getStorageSize() const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ this->getSegmentsCount() ]; -#else - return offsets.getElement( this->getSegmentsCount() ); -#endif - } - return offsets[ this->getSegmentsCount() ]; + return details::CSR< Device, Index >::getStorageSize( this->offsets ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h index 47e768d28..38f097669 100644 --- a/src/TNL/Containers/Segments/details/CSR.h +++ b/src/TNL/Containers/Segments/details/CSR.h @@ -35,23 +35,48 @@ class CSR offsets.template scan< Algorithms::ScanType::Exclusive >(); } - /*** - * \brief Returns size of the segment number \r segmentIdx - */ + template< typename CSROffsets > __cuda_callable__ - IndexType getSegmentSize( const IndexType segmentIdx ) const; + static IndexType getSegmentsCount( const CSROffsets& offsets ) + { + return offsets.getSize() - 1; + } /*** - * \brief Returns number of elements managed by all segments. + * \brief Returns size of the segment number \r segmentIdx */ + template< typename CSROffsets > __cuda_callable__ - IndexType getSize() const; + static IndexType getSegmentSize( const CSROffsets& offsets, const IndexType segmentIdx ) + { + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +#else + return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); +#endif + } + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; + } /*** * \brief Returns number of elements that needs to be allocated. */ + template< typename CSROffsets > __cuda_callable__ - IndexType getStorageSize() const; + static IndexType getStorageSize( const CSROffsets& offsets ) + { + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ getSegmentsCount( offsets ) ]; +#else + return offsets.getElement( getSegmentsCount( offsets ) ); +#endif + } + return offsets[ getSegmentsCount( offsets ) ]; + } __cuda_callable__ IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; @@ -85,5 +110,3 @@ class CSR } // namespace Segements } // namespace Conatiners } // namespace TNL - -#include diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h index b08ad0f04..ecfe63107 100644 --- a/src/TNL/Containers/Segments/details/Ellpack.h +++ b/src/TNL/Containers/Segments/details/Ellpack.h @@ -103,5 +103,3 @@ class Ellpack } // namespace Segements } // namespace Conatiners } // namespace TNL - -#include diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h index ecc2c8c7e..6f185bc46 100644 --- a/src/TNL/Containers/Segments/details/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h @@ -102,5 +102,3 @@ class SlicedEllpack } // namespace Segements } // namespace Conatiners } // namespace TNL - -#include -- GitLab From a87203a0b91da16f5e16b379e33ee09a8e2a1c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 22 Dec 2019 20:52:43 +0100 Subject: [PATCH 033/179] Implementing sparse matrix assignment. --- src/TNL/Containers/Segments/CSR.h | 6 +- src/TNL/Containers/Segments/CSR.hpp | 11 +++ src/TNL/Containers/Segments/Ellpack.h | 5 ++ src/TNL/Containers/Segments/Ellpack.hpp | 15 ++++ src/TNL/Containers/Segments/SlicedEllpack.h | 5 ++ src/TNL/Containers/Segments/SlicedEllpack.hpp | 17 ++++ src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 84 +++++++++++++++---- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 32 +++++-- 9 files changed, 156 insertions(+), 21 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index b83e43f1d..add07f1df 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -93,7 +93,6 @@ class CSR template< typename Function, typename... Args > void forAll( Function& f, Args... args ) const; - /*** * \brief Go over all segments and perform a reduction in each of them. */ @@ -103,6 +102,11 @@ class CSR template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + CSR& operator=( const CSR& rhsSegments ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_ > + CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 280ed6ebf..61720869c 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -221,6 +221,17 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename Device_, typename Index_, typename IndexAllocator_ > +CSR< Device, Index, IndexAllocator >& +CSR< Device, Index, IndexAllocator >:: +operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) +{ + this->offsets = source.offsets; +} + template< typename Device, typename Index, typename IndexAllocator > diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 9c81a8428..b9b3e63c1 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -100,6 +100,11 @@ class Ellpack template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + Ellpack& operator=( const Ellpack& source ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > + Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 482c87d4f..97d30d314 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -322,6 +322,21 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >& +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ) +{ + this->segmentSize = source.segmentSize; + this->size = source.size; + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index fc514c51f..9c2e7157f 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -96,6 +96,11 @@ class SlicedEllpack template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + SlicedEllpack& operator=( const SlicedEllpack& source ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > + SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index bdf28ff73..ad83f666a 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -356,6 +356,23 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >& +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ) +{ + this->size = source.size; + this->alignedSize = source.alignedSize; + this->segmentsCount = source.segmentsCount; + this->sliceOffsets = source.sliceOffsets; + this->sliceSegmentSizes = source.sliceSegmentSizes; +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 1512f8574..8c8fef599 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -218,7 +218,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > SegmentsType segments; - IndexAllocator indexAlloctor; + IndexAllocator indexAllocator; RealAllocator realAllocator; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index e24ed2f44..5de4473ab 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -680,7 +680,7 @@ operator=( const SparseMatrix& matrix ) Matrix< Real, Device, Index >::operator=( matrix ); this->columnIndexes = matrix.columnIndexes; this->segments = matrix.segments; - this->indexAlloctor = matrix.indexAllocator; + this->indexAllocator = matrix.indexAllocator; this->realAllocator = matrix.realAllocator; } @@ -702,29 +702,85 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >; + RowsCapacitiesType rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setCompressedRowLengths( rowLengths ); + + // TODO: Replace this with SparseMatrixView + const auto matrix_columns_view = matrix.columnIndexes.getConstView(); + const auto matrix_values_view = matrix.values.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto this_columns_view = this->columnIndexes.getView(); + auto this_values_view = this->values.getView(); + if( std::is_same< Device, Device2 >::value ) { - /*RowsCapacitiesType rowLengths; - matrix.getCompressedRowLengths( rowLengths ); - this->setCompressedRowLengths( rowLengths ); - // TODO: Replace this with SparseMatrixView - const auto matrix_columns_view = matrix.columnIndexes.getConstView(); - const auto matrix_values_view = matrix.values.getConstView(); - const auto segments_view = this->segments.getConstView(); - auto this_columns_view = this->columnIndexes.getView(); - auto this_values_view = this->values.getView(); - const IndexType paddingIndex = this->getPaddingIndex(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) { + const auto this_segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { const IndexType column = matrix_columns_view[ globalIdx ]; if( column != paddingIndex ) { const RealType value = matrix_values_view[ globalIdx ]; - IndexType thisGlobalIdx = segments_view.getGlobalIdx( rowIdx, localIdx ); + IndexType thisGlobalIdx = this_segments_view.getGlobalIndex( rowIdx, localIdx ); this_columns_view[ thisGlobalIdx ] = column; this_values_view[ thisGlobalIdx ] = value; } }; - matrix.forAllRows( f );*/ + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize ); + Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = paddingIndex; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { + const IndexType column = matrix_columns_view[ globalIdx ]; + if( column != paddingIndex ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ]; + matrixColumnsBuffer_view[ bufferIdx ] = column; + } + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; + if( column != paddingIndex ) + { + this_columns_view[ globalIdx ] = column; + this_values_view[ globalIdx ] = thisValuesBuffer_view[ bufferIdx ]; + } + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } } } diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 9b09ef4d4..684a6a871 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -12,12 +12,31 @@ #include #include -using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; +#include +#include +#include +#include + +/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; -using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >; +using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/ + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >; +using E_host = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Host, int >; +using E_cuda = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Cuda, int >; +using SE_host = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Host, int >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Cuda, int >; + #ifdef HAVE_GTEST #include @@ -388,7 +407,8 @@ void testConversion() checkTriDiagMatrix( triDiag1 ); Matrix2 triDiag2; - TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); + //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); + triDiag2 = triDiag1; checkTriDiagMatrix( triDiag2 ); } @@ -400,7 +420,8 @@ void testConversion() checkAntiTriDiagMatrix( antiTriDiag1 ); Matrix2 antiTriDiag2; - TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); + //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); + antiTriDiag2 = antiTriDiag1; checkAntiTriDiagMatrix( antiTriDiag2 ); } @@ -411,7 +432,8 @@ void testConversion() checkUnevenRowSizeMatrix( unevenRowSize1 ); Matrix2 unevenRowSize2; - TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); + //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); + unevenRowSize2 = unevenRowSize1; checkUnevenRowSizeMatrix( unevenRowSize2 ); } } -- GitLab From 127b3bc9b6108aba737b64368d9d3c16e941f556 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 22 Dec 2019 23:40:13 +0100 Subject: [PATCH 034/179] Fixing sparse matrix assignment operator. --- src/TNL/Containers/Segments/SlicedEllpack.hpp | 4 +-- src/TNL/Matrices/SparseMatrix.h | 8 ++++- src/TNL/Matrices/SparseMatrix.hpp | 29 ++++++++++++------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index ad83f666a..c9c1d8560 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -255,7 +255,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { - auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; @@ -270,7 +270,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } else { - auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 8c8fef599..44ded93a6 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -62,7 +62,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > virtual String getSerializationTypeVirtual() const; - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + template< typename RowsCapacitiesVector > + void setCompressedRowLengths( const RowsCapacitiesVector& rowCapacities ); + + // TODO: Remove this when possible + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { + this->setCompressedRowLengths( rowLengths ); + }; template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 5de4473ab..964e9eb22 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -104,12 +104,21 @@ template< typename Real, typename Index, typename RealAllocator, typename IndexAllocator > + template< typename RowsCapacitiesVector > void SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) +setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) { - TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); - this->segments.setSegmentsSizes( rowLengths ); + TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); + using RowsCapacitiesVectorDevice = typename RowsCapacitiesVector::DeviceType; + if( std::is_same< DeviceType, RowsCapacitiesVectorDevice >::value ) + this->segments.setSegmentsSizes( rowsCapacities ); + else + { + RowsCapacitiesType thisRowsCapacities; + thisRowsCapacities = rowsCapacities; + this->segments.setSegmentsSizes( thisRowsCapacities ); + } this->values.setSize( this->segments.getStorageSize() ); this->values = ( RealType ) 0; this->columnIndexes.setSize( this->segments.getStorageSize() ); @@ -594,13 +603,11 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - /*auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { - IndexType columnIdx = columns_view[ globalIdx ]; - if( columnIdx != paddingIndex_ ) - return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); - return zero; + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, globalIdx ); + return true; }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/ + this->segments.forSegments( first, last, f ); } @@ -702,8 +709,9 @@ SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) { using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >; - RowsCapacitiesType rowLengths; + typename RHSMatrixType::RowsCapacitiesType rowLengths; matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); // TODO: Replace this with SparseMatrixView @@ -712,6 +720,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 const IndexType paddingIndex = this->getPaddingIndex(); auto this_columns_view = this->columnIndexes.getView(); auto this_values_view = this->values.getView(); + this_columns_view = paddingIndex; if( std::is_same< Device, Device2 >::value ) { -- GitLab From 5a68640d3ba8d308c3096cb55024acd2268dff38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 23 Dec 2019 16:15:02 +0100 Subject: [PATCH 035/179] Fixed sparse matrix assignment operator. --- src/TNL/Containers/Segments/SlicedEllpackView.hpp | 2 +- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 8 +++++++- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 1 - 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index f2e03bd38..66cfce195 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -171,7 +171,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const } else { -#ifdef __CUDA__ARCH__ +#ifdef __CUDA_ARCH__ sliceOffset = this->sliceOffsets[ sliceIdx ]; segmentSize = this->sliceSegmentSizes[ sliceIdx ]; #else diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 44ded93a6..a5effce93 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,7 +34,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using IndexType = Index; using RealAllocatorType = RealAllocator; using IndexAllocatorType = IndexAllocator; - using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType >; + using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 964e9eb22..75f505f5f 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -739,8 +739,9 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 } else { + //std::cerr << "Matrix = " << std::endl << matrix << std::endl; const IndexType maxRowLength = max( rowLengths ); - const IndexType bufferRowsCount( 128 ); + const IndexType bufferRowsCount( 8 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize ); Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize ); @@ -757,6 +758,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); thisColumnsBuffer = paddingIndex; + matrixColumnsBuffer_view = paddingIndex; //// // Copy matrix elements into buffer @@ -765,12 +767,15 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 if( column != paddingIndex ) { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + //printf( ">>>RowIdx = %d GlobalIdx = %d column = %d bufferIdx = %d \n", rowIdx, globalIdx, column, bufferIdx ); matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ]; matrixColumnsBuffer_view[ bufferIdx ] = column; } }; matrix.forRows( baseRow, lastRow, f1 ); + //std::cerr << "Values = " << matrixValuesBuffer_view << std::endl; + //std::cerr << "Columns = " << matrixColumnsBuffer_view << std::endl; //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; @@ -790,6 +795,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } + //std::cerr << "This matrix = " << std::endl << *this << std::endl; } } diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 684a6a871..e3fb505d3 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -438,7 +438,6 @@ void testConversion() } } - TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); -- GitLab From 2e1ddabef943ff457f13eb339509bf14fe7c9aee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 27 Dec 2019 20:31:39 +0100 Subject: [PATCH 036/179] Fixed sparse matrix assignment operator. --- src/TNL/Matrices/SparseMatrix.hpp | 4 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 53 +++++++++---------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 75f505f5f..68f33b93e 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -687,8 +687,7 @@ operator=( const SparseMatrix& matrix ) Matrix< Real, Device, Index >::operator=( matrix ); this->columnIndexes = matrix.columnIndexes; this->segments = matrix.segments; - this->indexAllocator = matrix.indexAllocator; - this->realAllocator = matrix.realAllocator; + return *this; } // cross-device copy assignment @@ -797,6 +796,7 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 } //std::cerr << "This matrix = " << std::endl << *this << std::endl; } + return *this; } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index e3fb505d3..34ffd600d 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -360,38 +360,35 @@ template< typename Matrix1, typename Matrix2 > void testCopyAssignment() { { - SCOPED_TRACE("Tri Diagonal Matrix"); - - Matrix1 triDiag1; - setupTriDiagMatrix( triDiag1 ); - checkTriDiagMatrix( triDiag1 ); - - Matrix2 triDiag2; - triDiag2 = triDiag1; - checkTriDiagMatrix( triDiag2 ); + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); } - { - SCOPED_TRACE("Anti Tri Diagonal Matrix"); - - Matrix1 antiTriDiag1; - setupAntiTriDiagMatrix( antiTriDiag1 ); - checkAntiTriDiagMatrix( antiTriDiag1 ); - - Matrix2 antiTriDiag2; - antiTriDiag2 = antiTriDiag1; - checkAntiTriDiagMatrix( antiTriDiag2 ); + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); } - { - SCOPED_TRACE("Uneven Row Size Matrix"); - Matrix1 unevenRowSize1; - setupUnevenRowSizeMatrix( unevenRowSize1 ); - checkUnevenRowSizeMatrix( unevenRowSize1 ); - - Matrix2 unevenRowSize2; - unevenRowSize2 = unevenRowSize1; - checkUnevenRowSizeMatrix( unevenRowSize2 ); + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); } } -- GitLab From 5eb09d2d627578b16ac5b8e717e4957c6ee6a2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 27 Dec 2019 21:19:22 +0100 Subject: [PATCH 037/179] SpMV benchmark is printing full matrix types. --- src/Benchmarks/SpMV/spmv.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 66f4fb236..26ef145c9 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -159,7 +159,7 @@ benchmarkSpMV( Benchmark& benchmark, { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, - { "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) } + { "matrix format", convertToString( getType( hostMatrix ) ) } } )); hostVector.setSize( hostMatrix.getColumns() ); @@ -287,22 +287,18 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, { benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + //benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); + //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); //// // Segments based sparse matrices - std::cerr << "*********************************" << std::endl; benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); - std::cerr << "*********************************" << std::endl; benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); - std::cerr << "*********************************" << std::endl; - benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); - std::cerr << "*********************************" << std::endl; + //benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); + //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); } } // namespace Benchmarks -- GitLab From 15308a7caae4c6c0605b1ab1cf2c43fd592f1dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 11:53:37 +0100 Subject: [PATCH 038/179] Added template parameter MatrixType to SparseMatrix and rearrangement of the template parameters. --- src/Benchmarks/SpMV/spmv.h | 7 +- src/TNL/Matrices/SparseMatrix.h | 15 +- src/TNL/Matrices/SparseMatrix.hpp | 227 +++++++++++------- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 13 +- .../Matrices/SparseMatrixTest_CSR_segments.h | 48 ++-- .../SparseMatrixTest_Ellpack_segments.h | 48 ++-- .../SparseMatrixTest_SlicedEllpack_segments.h | 49 ++-- 7 files changed, 228 insertions(+), 179 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 26ef145c9..8a1b0614e 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -43,19 +44,19 @@ using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; // Segments based sparse matrix aliases template< typename Real, typename Device, typename Index > -using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Containers::Segments::CSR, Device, Index >; +using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >; template< typename Device, typename Index, typename IndexAllocator > using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > -using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, EllpackSegments, Device, Index >; +using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >; template< typename Device, typename Index, typename IndexAllocator > using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; template< typename Real, typename Device, typename Index > -using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, SlicedEllpackSegments, Device, Index >; +using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >; // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index a5effce93..0d8527daf 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -11,15 +11,18 @@ #pragma once #include +#include #include +#include namespace TNL { namespace Matrices { template< typename Real, - template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments, typename Device = Devices::Host, typename Index = int, + typename MatrixType = GeneralMatrix, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Containers::Segments::CSR, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > @@ -45,6 +48,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); @@ -83,8 +87,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; - template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > - void setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); + template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > + void setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ); IndexType getNumberOfNonzeroMatrixElements() const; @@ -197,12 +201,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > // cross-device copy assignment template< typename Real2, - template< typename, typename, typename > class Segments2, typename Device2, typename Index2, + typename MatrixType2, + template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > - SparseMatrix& operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ); + SparseMatrix& operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ); void save( File& file ) const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 68f33b93e..b8091d307 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -17,13 +17,14 @@ namespace TNL { namespace Matrices { - template< typename Real, - template< typename, typename, typename > class Segments, +template< typename Real, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) @@ -31,36 +32,39 @@ SparseMatrix( const RealAllocatorType& realAllocator, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix& m ) : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const SparseMatrix&& m ) : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, @@ -70,13 +74,14 @@ SparseMatrix( const IndexType rows, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > String -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getSerializationType() { return String( "Matrices::SparseMatrix< " ) + @@ -86,27 +91,29 @@ getSerializationType() } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > String -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename RowsCapacitiesVector > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) { TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); @@ -126,14 +133,15 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Vector > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); @@ -152,81 +160,87 @@ getCompressedRowLengths( Vector& rowLengths ) const } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getRowLength( const IndexType row ) const { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getRowLengthFast( const IndexType row ) const { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getNonZeroRowLength( const IndexType row ) const { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getNonZeroRowLengthFast( const IndexType row ) const { } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > - template< typename Real2, template< typename, typename, typename > class Segments2, typename Device2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -setLike( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getNumberOfNonzeroMatrixElements() const { const auto columns_view = this->columnIndexes.getConstView(); @@ -238,13 +252,14 @@ getNumberOfNonzeroMatrixElements() const } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -253,14 +268,15 @@ reset() } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setElementFast( const IndexType row, const IndexType column, const RealType& value ) @@ -269,13 +285,14 @@ setElementFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) @@ -284,14 +301,15 @@ setElement( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addElementFast( const IndexType row, const IndexType column, const RealType& value, @@ -301,13 +319,14 @@ addElementFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -367,14 +386,15 @@ addElement( const IndexType row, template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setRowFast( const IndexType row, const IndexType* columnIndexes, const RealType* values, @@ -383,13 +403,14 @@ setRowFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setRow( const IndexType row, const IndexType* columnIndexes, const RealType* values, @@ -412,14 +433,15 @@ setRow( const IndexType row, template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addRowFast( const IndexType row, const IndexType* columns, const RealType* values, @@ -430,13 +452,14 @@ addRowFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addRow( const IndexType row, const IndexType* columns, const RealType* values, @@ -446,16 +469,16 @@ addRow( const IndexType row, } - template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Real -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getElementFast( const IndexType row, const IndexType column ) const { @@ -463,13 +486,14 @@ getElementFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > Real -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { @@ -486,14 +510,15 @@ getElement( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getRowFast( const IndexType row, IndexType* columns, RealType* values ) const @@ -502,15 +527,16 @@ getRowFast( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Vector > __cuda_callable__ typename Vector::RealType -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { @@ -518,15 +544,16 @@ rowVectorProduct( const IndexType row, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename InVector, typename OutVector > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, @@ -553,14 +580,15 @@ vectorProduct( const InVector& inVector, } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto columns_view = this->columnIndexes.getConstView(); @@ -576,28 +604,30 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Function > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto columns_view = this->columnIndexes.getConstView(); @@ -612,14 +642,15 @@ forRows( IndexType first, IndexType last, Function& function ) const } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Function > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: forAllRows( Function& function ) const { this->forRows( 0, this->getRows(), function ); @@ -633,7 +664,7 @@ forAllRows( Function& function ) const typename IndexAllocator > template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) @@ -649,7 +680,7 @@ template< typename Real, typename IndexAllocator > template< typename Real2, typename Index2 > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { @@ -657,14 +688,15 @@ getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, }*/ template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Vector1, typename Vector2 > bool -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: performSORIteration( const Vector1& b, const IndexType row, Vector2& x, @@ -675,13 +707,14 @@ performSORIteration( const Vector1& b, // copy assignment template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: operator=( const SparseMatrix& matrix ) { Matrix< Real, Device, Index >::operator=( matrix ); @@ -692,22 +725,24 @@ operator=( const SparseMatrix& matrix ) // cross-device copy assignment template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > template< typename Real2, - template< typename, typename, typename > class Segments2, typename Device2, typename Index2, + typename MatrixType2, + template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >& -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: -operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >& matrix ) +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ) { - using RHSMatrixType = SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2, IndexAllocator2 >; + using RHSMatrixType = SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >; typename RHSMatrixType::RowsCapacitiesType rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); @@ -800,13 +835,14 @@ operator=( const SparseMatrix< Real2, Segments2, Device2, Index2, RealAllocator2 } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: save( File& file ) const { Matrix< RealType, DeviceType, IndexType >::save( file ); @@ -815,13 +851,14 @@ save( File& file ) const } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: load( File& file ) { Matrix< RealType, DeviceType, IndexType >::load( file ); @@ -830,39 +867,42 @@ load( File& file ) } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: save( const String& fileName ) const { Object::save( fileName ); } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: load( const String& fileName ) { Object::load( fileName ); } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > void -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) @@ -882,14 +922,15 @@ print( std::ostream& str ) const } template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index -SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >:: +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return -1; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 34ffd600d..d100bb939 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -30,12 +31,12 @@ using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, Index template< typename Device, typename Index, typename IndexAllocator > using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; -using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >; -using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >; -using E_host = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Host, int >; -using E_cuda = TNL::Matrices::SparseMatrix< int, EllpackSegments, TNL::Devices::Cuda, int >; -using SE_host = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Host, int >; -using SE_cuda = TNL::Matrices::SparseMatrix< int, SlicedEllpackSegments, TNL::Devices::Cuda, int >; +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; #ifdef HAVE_GTEST diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 0718e3a69..353dcdbb0 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -29,31 +29,31 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::CSR, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > #endif >; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 16c22d9ca..b7dc33834 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -40,31 +40,31 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack > #endif >; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 8597121e4..b2404fe68 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -10,6 +10,7 @@ #include #include +#include #include "SparseMatrixTest.hpp" @@ -40,31 +41,31 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack > #endif >; -- GitLab From c1e68285eeea3f4908d4724a553f721a83e9254d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 11:59:31 +0100 Subject: [PATCH 039/179] Added MatrixType.h. --- src/TNL/Matrices/MatrixType.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/TNL/Matrices/MatrixType.h diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h new file mode 100644 index 000000000..c5c8f6375 --- /dev/null +++ b/src/TNL/Matrices/MatrixType.h @@ -0,0 +1,27 @@ +/*************************************************************************** + MatrixType.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +struct GeneralMatrix +{ + static constexpr bool isSymmetric() { return false; } +}; + +struct SymmetricMatrix +{ + static constexpr bool isSymmetric() { return true; } +}; + + } //namespace Matrices +} //namespace TNL \ No newline at end of file -- GitLab From 03949d09e39871858d3b7b1c64da75394596c1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 13:33:43 +0100 Subject: [PATCH 040/179] Added MatrixView. --- src/TNL/Matrices/Matrix.h | 21 +- .../Matrices/{Matrix_impl.h => Matrix.hpp} | 33 +- src/TNL/Matrices/MatrixView.h | 150 +++++++++ src/TNL/Matrices/MatrixView.hpp | 286 ++++++++++++++++++ 4 files changed, 477 insertions(+), 13 deletions(-) rename src/TNL/Matrices/{Matrix_impl.h => Matrix.hpp} (92%) create mode 100644 src/TNL/Matrices/MatrixView.h create mode 100644 src/TNL/Matrices/MatrixView.hpp diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 4a038eb2e..96409c89b 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace TNL { /** @@ -30,13 +31,15 @@ class Matrix : public Object { public: using RealType = Real; - typedef Device DeviceType; - typedef Index IndexType; - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - typedef Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > ValuesVector; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >; using RealAllocatorType = RealAllocator; + using ViewType = MatrixView< Real, Device, Index >; + using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >; Matrix( const RealAllocatorType& allocator = RealAllocatorType() ); @@ -44,6 +47,10 @@ public: const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); + ViewType getView(); + + ConstViewType getConstView() const; + virtual void setDimensions( const IndexType rows, const IndexType columns ); @@ -162,4 +169,4 @@ void MatrixVectorProductCuda( const Matrix& matrix, } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix.hpp similarity index 92% rename from src/TNL/Matrices/Matrix_impl.h rename to src/TNL/Matrices/Matrix.hpp index a93c7a893..91b81ffcf 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix.hpp @@ -43,6 +43,28 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType { } +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +auto +Matrix< Real, Device, Index, RealAllocator >:: +getView() -> ViewType +{ + return ViewType( rows, columns, values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +auto +Matrix< Real, Device, Index, RealAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( rows, columns, values.getConstView() ); +} + template< typename Real, typename Device, typename Index, @@ -105,12 +127,11 @@ template< typename Real, typename RealAllocator > Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const { - IndexType nonZeroElements( 0 ); - for( IndexType i = 0; this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0.0 ) - nonZeroElements++; - - return nonZeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h new file mode 100644 index 000000000..a2fa975cf --- /dev/null +++ b/src/TNL/Matrices/MatrixView.h @@ -0,0 +1,150 @@ +/*************************************************************************** + MatrixView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { +/** + * \brief Namespace for matrix formats. + */ +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int > +class MatrixView : public Object +{ +public: + using RealType = Real; + typedef Device DeviceType; + typedef Index IndexType; + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView; + + __cuda_callable__ + MatrixView(); + + __cuda_callable__ + MatrixView( const IndexType rows, + const IndexType columns, + const ValuesView& values ); + + __cuda_callable__ + MatrixView( const MatrixView& view ) = default; + + virtual IndexType getRowLength( const IndexType row ) const = 0; + + // TODO: implementation is not parallel + // TODO: it would be nice if padding zeros could be stripped + void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + + virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + + IndexType getNumberOfMatrixElements() const; + + virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; + + void reset(); + + __cuda_callable__ + IndexType getRows() const; + + __cuda_callable__ + IndexType getColumns() const; + + /**** + * TODO: The fast variants of the following methods cannot be virtual. + * If they were, they could not be used in the CUDA kernels. If CUDA allows it + * in the future and it does not slow down, declare them as virtual here. + */ + + virtual bool setElement( const IndexType row, + const IndexType column, + const RealType& value ) = 0; + + virtual bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ) = 0; + + virtual bool setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements ) = 0; + + virtual bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ) = 0; + + virtual Real getElement( const IndexType row, + const IndexType column ) const = 0; + + const ValuesView& getValues() const; + + ValuesView& getValues(); + + // TODO: parallelize and optimize for sparse matrices + template< typename Matrix > + bool operator == ( const Matrix& matrix ) const; + + template< typename Matrix > + bool operator != ( const Matrix& matrix ) const; + + virtual void save( File& file ) const; + + virtual void load( File& file ); + + virtual void print( std::ostream& str ) const; + + + // TODO: method for symmetric matrices, should not be in general Matrix interface + __cuda_callable__ + const IndexType& getNumberOfColors() const; + + // TODO: method for symmetric matrices, should not be in general Matrix interface + void computeColorsVector(Containers::Vector &colorsVector); + + protected: + + IndexType rows, columns; + + ValuesView values; +}; + +template< typename Real, typename Device, typename Index > +std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m ) +{ + m.print( str ); + return str; +} + +/* +template< typename Matrix, + typename InVector, + typename OutVector > +void MatrixVectorProductCuda( const Matrix& matrix, + const InVector& inVector, + OutVector& outVector ); +*/ + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp new file mode 100644 index 000000000..bd3d9beae --- /dev/null +++ b/src/TNL/Matrices/MatrixView.hpp @@ -0,0 +1,286 @@ +/*************************************************************************** + MatrixView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView() +: rows( 0 ), + columns( 0 ) +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView( const IndexType rows_, + const IndexType columns_, + const ValuesView& values_ ) + : rows( rows_ ), columns( columns_ ), values( values_ ) +{ +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + getCompressedRowLengths( rowLengths.getView() ); +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + +template< typename Real, + typename Device, + typename Index > +Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const +{ + return this->values.getSize(); +} + +template< typename Real, + typename Device, + typename Index > +Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getRows() const +{ + return this->rows; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getColumns() const +{ + return this->columns; +} + +template< typename Real, + typename Device, + typename Index > +const typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() const +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index > +typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::reset() +{ + this->rows = 0; + this->columns = 0; + this->values.reset(); +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const +{ + if( this->getRows() != matrix.getRows() || + this->getColumns() != matrix.getColumns() ) + return false; + for( IndexType row = 0; row < this->getRows(); row++ ) + for( IndexType column = 0; column < this->getColumns(); column++ ) + if( this->getElement( row, column ) != matrix.getElement( row, column ) ) + return false; + return true; +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const +{ + return ! operator == ( matrix ); +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::save( File& file ) const +{ + Object::save( file ); + file.save( &this->rows ); + file.save( &this->columns ); + file << this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::load( File& file ) +{ + Object::load( file ); + file.load( &this->rows ); + file.load( &this->columns ); + file >> this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::print( std::ostream& str ) const +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +const Index& +MatrixView< Real, Device, Index >:: +getNumberOfColors() const +{ + return this->numberOfColors; +} + +template< typename Real, + typename Device, + typename Index > +void +MatrixView< Real, Device, Index >:: +computeColorsVector(Containers::Vector &colorsVector) +{ + for( IndexType i = this->getRows() - 1; i >= 0; i-- ) + { + // init color array + Containers::Vector< Index, Device, Index > usedColors; + usedColors.setSize( this->numberOfColors ); + for( IndexType j = 0; j < this->numberOfColors; j++ ) + usedColors.setElement( j, 0 ); + + // find all colors used in given row + for( IndexType j = i + 1; j < this->getColumns(); j++ ) + if( this->getElement( i, j ) != 0.0 ) + usedColors.setElement( colorsVector.getElement( j ), 1 ); + + // find unused color + bool found = false; + for( IndexType j = 0; j < this->numberOfColors; j++ ) + if( usedColors.getElement( j ) == 0 ) + { + colorsVector.setElement( i, j ); + found = true; + break; + } + if( !found ) + { + colorsVector.setElement( i, this->numberOfColors ); + this->numberOfColors++; + } + } +} + +/* +#ifdef HAVE_CUDA +template< typename Matrix, + typename InVector, + typename OutVector > +__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, + const InVector* inVector, + OutVector* outVector, + int gridIdx ) +{ + static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); + const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < matrix->getRows() ) + ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); +} +#endif + +template< typename Matrix, + typename InVector, + typename OutVector > +void MatrixVectorProductCuda( const Matrix& matrix, + const InVector& inVector, + OutVector& outVector ) +{ +#ifdef HAVE_CUDA + typedef typename Matrix::IndexType IndexType; + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_this, + kernel_inVector, + kernel_outVector, + gridIdx ); + TNL_CHECK_CUDA_DEVICE; + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); + TNL_CHECK_CUDA_DEVICE; +#endif +} +*/ + +} // namespace Matrices +} // namespace TNL -- GitLab From 256e89f0dd80a852d21432985511887440148b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 15:17:35 +0100 Subject: [PATCH 041/179] Added SparseMatrixView. --- src/TNL/Matrices/SparseMatrix.h | 1 + src/TNL/Matrices/SparseMatrix.hpp | 2 +- src/TNL/Matrices/SparseMatrixView.h | 196 ++++++++ src/TNL/Matrices/SparseMatrixView.hpp | 647 ++++++++++++++++++++++++++ 4 files changed, 845 insertions(+), 1 deletion(-) create mode 100644 src/TNL/Matrices/SparseMatrixView.h create mode 100644 src/TNL/Matrices/SparseMatrixView.hpp diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 0d8527daf..5f02e9fde 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace TNL { namespace Matrices { diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index b8091d307..08eae92b4 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrix.h - description + SparseMatrix.hpp - description ------------------- begin : Nov 29, 2019 copyright : (C) 2019 by Tomas Oberhuber diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h new file mode 100644 index 000000000..b40d9c0c2 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -0,0 +1,196 @@ +/*************************************************************************** + SparseMatrixView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device = Devices::Host, + typename Index = int, + typename MatrixType = GeneralMatrix, + template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView > +class SparseMatrixView : public MatrixView< Real, Device, Index > +{ + public: + + using RealType = Real; + template< typename Device_, typename Index_ > + using SegmentsViewTemplate = SegmentsView< Device_, Index_ >; + using SegmentsViewType = SegmentsView< Device, Index >; + using DeviceType = Device; + using IndexType = Index; + using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; + using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView; + using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + + __cuda_callable__ + SparseMatrixView(); + + __cuda_callable__ + SparseMatrixView( const IndexType rows, + const IndexType columns, + ValuesViewType& values, + ColumnsViewType& columnIndexes, + SegmentsViewType& segments ); + + __cuda_callable__ + SparseMatrixView( const SparseMatrixView& m ) = default; + + //__cuda_callable__ + //SparseMatrixView( const SparseMatrixView&& m ) = default; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + IndexType getRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + + IndexType getNonZeroRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getNonZeroRowLengthFast( const IndexType row ) const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + __cuda_callable__ + bool setElementFast( const IndexType row, + const IndexType column, + const RealType& value ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + __cuda_callable__ + bool addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + + __cuda_callable__ + bool setRowFast( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + + bool setRow( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ); + + + __cuda_callable__ + bool addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + + __cuda_callable__ + RealType getElementFast( const IndexType row, + const IndexType column ) const; + + RealType getElement( const IndexType row, + const IndexType column ) const; + + __cuda_callable__ + void getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + /*** + * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& inVectorAddition = 0.0 ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Vector1, typename Vector2 > + bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + void save( File& file ) const; + + void save( const String& fileName ) const; + + void print( std::ostream& str ) const; + + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + ColumnsViewType columnIndexes; + + SegmentsViewType segments; +}; + +} // namespace Conatiners +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp new file mode 100644 index 000000000..0c49cd58d --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -0,0 +1,647 @@ +/*************************************************************************** + SparseMatrixView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +SparseMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +SparseMatrixView( const IndexType rows, + const IndexType columns, + ValuesViewType& values, + ColumnsViewType& columnIndexes, + SegmentsViewType& segments ) + : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +String +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getSerializationType() +{ + return String( "Matrices::SparseMatrix< " ) + + TNL::getSerializationType< RealType >() + ", " + + TNL::getSerializationType< SegmentsView >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +String +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Vector > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRowLength( const IndexType row ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRowLengthFast( const IndexType row ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getNonZeroRowLength( const IndexType row ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getNonZeroRowLengthFast( const IndexType row ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( columns_view[ i ] != paddingIndex ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +reset() +{ + Matrix< Real, Device, Index >::reset(); + this->columnIndexes.reset(); + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +setElementFast( const IndexType row, + const IndexType column, + const RealType& value ) +{ + return this->addElementFast( row, column, value, 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + return this->addElement( row, column, value, 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT( row >= 0 && row < this->rows && + column >= 0 && column < this->columns, + std::cerr << " row = " << row + << " column = " << column + << " this->rows = " << this->rows + << " this->columns = " << this->columns ); + + const IndexType rowSize = this->segments.getSegmentSize( row ); + IndexType col( this->getPaddingIndex() ); + IndexType i; + IndexType globalIdx; + for( i = 0; i < rowSize; i++ ) + { + globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + { + this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); + return true; + } + if( col == this->getPaddingIndex() || col > column ) + break; + } + if( i == rowSize ) + return false; + if( col == this->getPaddingIndex() ) + { + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } + else + { + IndexType j = rowSize - 1; + while( j > i ) + { + const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); + const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); + TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); + TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); + this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); + this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); + j--; + } + + this->columnIndexes.setElement( globalIdx, column ); + this->values.setElement( globalIdx, value ); + return true; + } +} + + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +setRowFast( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +setRow( const IndexType row, + const IndexType* columnIndexes, + const RealType* values, + const IndexType elements ) +{ + const IndexType rowLength = this->segments.getSegmentSize( row ); + if( elements > rowLength ) + return false; + + for( IndexType i = 0; i < elements; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] ); + this->values.setElement( globalIdx, values[ i ] ); + } + for( IndexType i = elements; i < rowLength; i++ ) + this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() ); + return true; +} + + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator ) +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator ) +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +Real +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getElementFast( const IndexType row, + const IndexType column ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Real +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getElement( const IndexType row, + const IndexType column ) const +{ + const IndexType rowSize = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowSize; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + const IndexType col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + return this->values.getElement( globalIdx ); + } + return 0.0; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +template< typename InVector, + typename OutVector > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& inVectorAddition ) const +{ + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { + const IndexType column = columnIndexesView[ offset ]; + if( column == paddingIndex ) + return 0.0; + return valuesView[ offset ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + IndexType columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, globalIdx ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +/*template< typename Real, + template< typename, typename > class SegmentsView, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addMatrix( const SparseMatrixView< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename > class SegmentsView, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, typename Index2 > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getTransposition( const SparseMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + +}*/ + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +template< typename Vector1, typename Vector2 > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +save( File& file ) const +{ + Matrix< RealType, DeviceType, IndexType >::save( file ); + file << this->columnIndexes; + this->segments.save( file ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + const IndexType rowLength = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowLength; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + const IndexType column = this->columnIndexes.getElement( globalIdx ); + if( column == this->getPaddingIndex() ) + break; + str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t"; + } + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getPaddingIndex() const +{ + return -1; +} + + } //namespace Matrices +} // namespace TNL -- GitLab From 0f8eb296551dbe58c43132365650beb85fe97897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 17:12:54 +0100 Subject: [PATCH 042/179] ViewType and ConstViewType added to Matrix(View) and SparseMatrix(View). --- src/TNL/Containers/Segments/CSR.h | 2 ++ src/TNL/Containers/Segments/CSRView.h | 2 ++ src/TNL/Containers/Segments/Ellpack.h | 2 ++ src/TNL/Containers/Segments/EllpackView.h | 2 ++ src/TNL/Containers/Segments/SlicedEllpack.h | 2 ++ .../Containers/Segments/SlicedEllpackView.h | 2 ++ src/TNL/Matrices/MatrixView.h | 20 +++++++---- src/TNL/Matrices/MatrixView.hpp | 22 ++++++++++++ src/TNL/Matrices/SparseMatrix.h | 8 +++++ src/TNL/Matrices/SparseMatrix.hpp | 36 +++++++++++++++++++ src/TNL/Matrices/SparseMatrixView.h | 9 +++++ src/TNL/Matrices/SparseMatrixView.hpp | 36 ++++++++++++++++++- 12 files changed, 136 insertions(+), 7 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index add07f1df..f14060559 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -30,6 +30,8 @@ class CSR using IndexType = Index; using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = CSRView< Device_, Index_ >; using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 2f8957970..4917df9e8 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -29,6 +29,8 @@ class CSRView using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType; using ViewType = CSRView; + template< typename Device_, typename Index_ > + using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; __cuda_callable__ diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index b9b3e63c1..8cb430b6a 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -32,6 +32,8 @@ class Ellpack static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index adbfee629..6c6926be9 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -33,6 +33,8 @@ class EllpackView static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView; //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 9c2e7157f..946c9b642 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -32,6 +32,8 @@ class SlicedEllpack static constexpr int getSliceSize() { return SliceSize; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; + template< typename Device_, typename Index_ > + using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; SlicedEllpack(); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 275baacf5..adcf9ef5a 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -31,6 +31,8 @@ class SlicedEllpackView using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >; static constexpr int getSliceSize() { return SliceSize; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + template< typename Device_, typename Index_ > + using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ViewType = SlicedEllpackView; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index a2fa975cf..80fa28acf 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -29,12 +29,14 @@ class MatrixView : public Object { public: using RealType = Real; - typedef Device DeviceType; - typedef Index IndexType; - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - typedef Containers::VectorView< RealType, DeviceType, IndexType > ValuesView; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >; + using ViewType = MatrixView< typename std::remove_const< Real >::type, Device, Index >; + using ConstViewType = MatrixView< typename std::add_const< Real >::type, Device, Index >; __cuda_callable__ MatrixView(); @@ -47,6 +49,12 @@ public: __cuda_callable__ MatrixView( const MatrixView& view ) = default; + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + virtual IndexType getRowLength( const IndexType row ) const = 0; // TODO: implementation is not parallel diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index bd3d9beae..55ebc3d67 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -42,6 +42,28 @@ MatrixView( const IndexType rows_, { } +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +auto +MatrixView< Real, Device, Index >:: +getView() ->ViewType +{ + return ViewType( rows, columns, values.getView() ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +auto +MatrixView< Real, Device, Index >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( rows, columns, values.getConstView() ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 5f02e9fde..558cbb5b1 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,6 +34,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; using SegmentsType = Segments< Device, Index, IndexAllocator >; + template< typename Device_, typename Index_ > + using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; @@ -43,6 +45,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; + using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -63,6 +67,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + ViewType getView(); + + ConstViewType getConstView() const; + static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 08eae92b4..8af68bd4d 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -73,6 +73,42 @@ SparseMatrix( const IndexType rows, { } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->getColumnsIndexes().getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->getColumnsIndexes().getConstView(), + this->segments.getConstView() ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index b40d9c0c2..847c21dd5 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -37,6 +37,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView; using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -61,6 +64,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > //__cuda_callable__ //SparseMatrixView( const SparseMatrixView&& m ) = default; + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 0c49cd58d..ffcba43dc 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -41,7 +41,41 @@ SparseMatrixView( const IndexType rows, ColumnsViewType& columnIndexes, SegmentsViewType& segments ) : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) -{ +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->getColumnsIndexes().getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->getColumnsIndexes().getConstView(), + this->segments.getConstView() ); } template< typename Real, -- GitLab From 8a083c49d0c3a8b6499bf53b4229e9445f5f116b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 28 Dec 2019 22:49:57 +0100 Subject: [PATCH 043/179] Added SparseMatrixRowView. --- src/TNL/Containers/Segments/CSR.h | 5 + src/TNL/Containers/Segments/CSR.hpp | 11 +++ src/TNL/Containers/Segments/CSRSegmentView.h | 47 ++++++++++ src/TNL/Containers/Segments/CSRView.h | 5 + src/TNL/Containers/Segments/CSRView.hpp | 10 ++ src/TNL/Containers/Segments/Ellpack.h | 5 + src/TNL/Containers/Segments/Ellpack.hpp | 16 ++++ .../Containers/Segments/EllpackSegmentView.h | 49 ++++++++++ src/TNL/Containers/Segments/EllpackView.h | 5 + src/TNL/Containers/Segments/EllpackView.hpp | 15 +++ src/TNL/Containers/Segments/SlicedEllpack.h | 5 + src/TNL/Containers/Segments/SlicedEllpack.hpp | 21 +++++ .../Containers/Segments/SlicedEllpackView.h | 6 +- .../Containers/Segments/SlicedEllpackView.hpp | 20 ++++ src/TNL/Matrices/SparseMatrix.h | 22 ++++- src/TNL/Matrices/SparseMatrix.hpp | 30 ++++++ src/TNL/Matrices/SparseMatrixRowView.h | 64 +++++++++++++ src/TNL/Matrices/SparseMatrixRowView.hpp | 94 +++++++++++++++++++ src/TNL/Matrices/SparseMatrixView.h | 22 +++-- src/TNL/Matrices/SparseMatrixView.hpp | 26 +++++ 20 files changed, 466 insertions(+), 12 deletions(-) create mode 100644 src/TNL/Containers/Segments/CSRSegmentView.h create mode 100644 src/TNL/Containers/Segments/EllpackSegmentView.h create mode 100644 src/TNL/Matrices/SparseMatrixRowView.h create mode 100644 src/TNL/Matrices/SparseMatrixRowView.hpp diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index f14060559..ddf56b67d 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -14,6 +14,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -34,6 +35,7 @@ class CSR using ViewTemplate = CSRView< Device_, Index_ >; using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; + using SegmentView = CSRSegmentView< IndexType >; CSR(); @@ -83,6 +85,9 @@ class CSR __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 61720869c..16e8a7763 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -158,6 +158,17 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +auto +CSR< Device, Index, IndexAllocator >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); +} + template< typename Device, typename Index, typename IndexAllocator > diff --git a/src/TNL/Containers/Segments/CSRSegmentView.h b/src/TNL/Containers/Segments/CSRSegmentView.h new file mode 100644 index 000000000..3ab5ef9d2 --- /dev/null +++ b/src/TNL/Containers/Segments/CSRSegmentView.h @@ -0,0 +1,47 @@ +/*************************************************************************** + CSRSegmentView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Index > +class CSRSegmentView +{ + public: + + using IndexType = Index; + + __cuda_callable__ + CSRSegmentView( const IndexType offset, const IndexType size ) + : segmentOffset( offset ), segmentSize( size ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex; + }; + + protected: + + IndexType segmentOffset, segmentSize; +}; + } //namespace Segments + } //namespace Containers +} //namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 4917df9e8..3af5798f7 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -13,6 +13,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -32,6 +33,7 @@ class CSRView template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; + using SegmentView = CSRSegmentView< IndexType >; __cuda_callable__ CSRView(); @@ -82,6 +84,9 @@ class CSRView __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index dd4c434ba..0135c8c68 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -149,6 +149,16 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index > +__cuda_callable__ +auto +CSRView< Device, Index >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); +} + template< typename Device, typename Index > template< typename Function, typename... Args > diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 8cb430b6a..0ecae8e7d 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -36,6 +37,7 @@ class Ellpack using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; + using SegmentView = EllpackSegmentView< IndexType >; Ellpack(); @@ -80,6 +82,9 @@ class Ellpack __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 97d30d314..762d314dd 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -216,6 +216,22 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +auto +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + if( RowMajorOrder ) + return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + else + return SegmentView( segmentIdx, this->segmentSize, this->alignedSize ); +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/EllpackSegmentView.h b/src/TNL/Containers/Segments/EllpackSegmentView.h new file mode 100644 index 000000000..7a1638e3f --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackSegmentView.h @@ -0,0 +1,49 @@ +/*************************************************************************** + EllpackSegmentView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Index > +class EllpackSegmentView +{ + public: + + using IndexType = Index; + + __cuda_callable__ + EllpackSegmentView( const IndexType offset, + const IndexType size, + const IndexType step ) + : segmentOffset( offset ), segmentSize( size ), step( step ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex * step; + }; + + protected: + + IndexType segmentOffset, segmentSize, step; +}; + } //namespace Segments + } //namespace Containers +} //namespace TNL diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 6c6926be9..185321adb 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -13,6 +13,7 @@ #include #include +#include namespace TNL { @@ -37,6 +38,7 @@ class EllpackView using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView; //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; + using SegmentView = EllpackSegmentView< IndexType >; __cuda_callable__ EllpackView(); @@ -75,6 +77,9 @@ class EllpackView __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index d124633ff..914d30a2e 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -160,6 +160,21 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +auto +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + if( RowMajorOrder ) + return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + else + return SegmentView( segmentIdx, this->segmentSize, this->alignedSize ); +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 946c9b642..8c01e8a28 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -12,6 +12,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -35,6 +36,7 @@ class SlicedEllpack template< typename Device_, typename Index_ > using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; + using SegmentView = EllpackSegmentView< IndexType >; SlicedEllpack(); @@ -76,6 +78,9 @@ class SlicedEllpack __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index c9c1d8560..1f6479704 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -241,6 +241,27 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +auto +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; + const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + + if( RowMajorOrder ) + return SegmentView( sliceOffset, segmentSize, 1 ); + else + return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index adcf9ef5a..890814b81 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -13,6 +13,7 @@ #include #include +#include namespace TNL { namespace Containers { @@ -35,6 +36,7 @@ class SlicedEllpackView using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ViewType = SlicedEllpackView; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; + using SegmentView = EllpackSegmentView< IndexType >; __cuda_callable__ SlicedEllpackView(); @@ -68,7 +70,6 @@ class SlicedEllpackView __cuda_callable__ IndexType getSize() const; - __cuda_callable__ IndexType getStorageSize() const; @@ -78,6 +79,9 @@ class SlicedEllpackView __cuda_callable__ void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + __cuda_callable__ + SegmentView getSegmentView( const IndexType segmentIdx ) const; + /*** * \brief Go over all segments and for each segment element call * function 'f' with arguments 'args'. The return type of 'f' is bool. diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 66cfce195..45e33b236 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -196,6 +196,26 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +auto +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentView +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; + const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + + if( RowMajorOrder ) + return SegmentView( sliceOffset, segmentSize, 1 ); + else + return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 558cbb5b1..46c02dfb0 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace TNL { namespace Matrices { @@ -36,6 +37,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using SegmentsType = Segments< Device, Index, IndexAllocator >; template< typename Device_, typename Index_ > using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >; + using SegmentViewType = typename SegmentsType::ViewType; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; @@ -47,6 +49,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using RowView = SparseMatrixRowView< RealType, SegmentViewType >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -104,6 +107,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void reset(); __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + [[deprecated("")]] __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); @@ -112,37 +121,40 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const IndexType column, const RealType& value ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + [[deprecated("")]] bool addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool setRowFast( const IndexType row, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); + [[deprecated("")]] bool setRow( const IndexType row, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool addRowFast( const IndexType row, const IndexType* columns, const RealType* values, const IndexType numberOfElements, const RealType& thisElementMultiplicator = 1.0 ); + [[deprecated("")]] bool addRow( const IndexType row, const IndexType* columns, const RealType* values, @@ -150,14 +162,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; RealType getElement( const IndexType row, const IndexType column ) const; - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ void getRowFast( const IndexType row, IndexType* columns, RealType* values ) const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 8af68bd4d..3f26c95ca 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -303,6 +303,36 @@ reset() } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h new file mode 100644 index 000000000..c6d0468f9 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -0,0 +1,64 @@ +/*************************************************************************** + SparseMatrixRowView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +template< typename Real, + typename SegmentView > +class SparseMatrixRowView +{ + public: + + using RealType = Real; + using SegmentViewType = SegmentView; + using DeviceType = typename SegmentViewType::DeviceType; + using IndexType = typename SegmentViewType::IndexType; + using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >; + using ColumnIndexesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + + __cuda_callable__ + SparseMatrixRowView( const SegmentView& segmentView, + const ValuesView& values, + const ColumnIndexesView& columnIndexes ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType& getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + IndexType& getColumnIndex( const IndexType localIdx ); + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const IndexType column, + const RealType& value ); + protected: + + SegmentView segmentView; + + ValuesView values; + + ColumnIndexesView columnIndexes; +}; + } // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp new file mode 100644 index 000000000..364bb8e2e --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -0,0 +1,94 @@ +/*************************************************************************** + SparseMatrixRowView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Matrices { + +template< typename Real, + typename SegmentView > +__cuda_callable__ +SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView( const SegmentView& segmentView, + const ValuesView& values, + const ColumnIndexesView& columnIndexes ) + : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes ) +{ +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ auto +SparseMatrixRowView< Real, SegmentView >:: +getSize() const -> IndexType +{ + return segmentView.getSize(); +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ auto +SparseMatrixRowView< Real, SegmentView >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ auto +SparseMatrixRowView< Real, SegmentView >:: +getColumnIndex( const IndexType localIdx ) -> IndexType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ auto +SparseMatrixRowView< Real, SegmentView >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return values[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ auto +SparseMatrixRowView< Real, SegmentView >:: +getValue( const IndexType localIdx ) -> RealType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return values[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename Real, + typename SegmentView > +__cuda_callable__ void +SparseMatrixRowView< Real, SegmentView >:: +setElement( const IndexType localIdx, + const IndexType column, + const RealType& value ) +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + const IndexType globalIdx = segmentView.getGlobalIndex( localIdx ); + columnIndexes[ globalIdx ] = column; + values[ globalIdx ] = value; +} + + + } // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 847c21dd5..a674ee807 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace TNL { namespace Matrices { @@ -39,7 +40,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - + using RowView = SparseMatrixRowView< RealType, SegmentsViewType >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -92,6 +93,12 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > void reset(); __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + [[deprecated("")]] __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); @@ -100,37 +107,40 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > const IndexType column, const RealType& value ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + [[deprecated("")]] bool addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool setRowFast( const IndexType row, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); + [[deprecated("")]] bool setRow( const IndexType row, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ bool addRowFast( const IndexType row, const IndexType* columns, const RealType* values, const IndexType numberOfElements, const RealType& thisElementMultiplicator = 1.0 ); + [[deprecated("")]] bool addRow( const IndexType row, const IndexType* columns, const RealType* values, @@ -138,14 +148,14 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; RealType getElement( const IndexType row, const IndexType column ) const; - __cuda_callable__ + [[deprecated("")]] __cuda_callable__ void getRowFast( const IndexType row, IndexType* columns, RealType* values ) const; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index ffcba43dc..3f9743124 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -211,6 +211,32 @@ reset() } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + template< typename Real, typename Device, typename Index, -- GitLab From 02c67f6e1a3cfbd96c3714d8d013c0bc0e4d9c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 29 Dec 2019 18:09:32 +0100 Subject: [PATCH 044/179] Debugging SparseMatrixRowView. --- src/TNL/Containers/Segments/CSR.h | 6 +- src/TNL/Containers/Segments/CSR.hpp | 12 +- src/TNL/Containers/Segments/CSRView.h | 6 +- src/TNL/Containers/Segments/CSRView.hpp | 13 +- src/TNL/Containers/Segments/Ellpack.h | 6 +- src/TNL/Containers/Segments/Ellpack.hpp | 14 +- src/TNL/Containers/Segments/EllpackView.h | 6 +- src/TNL/Containers/Segments/EllpackView.hpp | 17 +- src/TNL/Containers/Segments/SlicedEllpack.h | 6 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 16 +- .../Containers/Segments/SlicedEllpackView.h | 6 +- .../Containers/Segments/SlicedEllpackView.hpp | 17 +- src/TNL/Matrices/Matrix.h | 4 +- src/TNL/Matrices/Matrix.hpp | 4 +- src/TNL/Matrices/MatrixView.h | 10 +- src/TNL/Matrices/MatrixView.hpp | 4 +- src/TNL/Matrices/SparseMatrix.h | 12 +- src/TNL/Matrices/SparseMatrix.hpp | 6 +- src/TNL/Matrices/SparseMatrixRowView.h | 24 +- src/TNL/Matrices/SparseMatrixRowView.hpp | 55 +++-- src/TNL/Matrices/SparseMatrixView.h | 13 +- src/TNL/Matrices/SparseMatrixView.hpp | 12 +- src/UnitTests/Matrices/SparseMatrixTest.hpp | 227 ++++++++++++++++++ .../Matrices/SparseMatrixTest_CSR_segments.h | 8 + .../SparseMatrixTest_Ellpack_segments.h | 7 + .../SparseMatrixTest_SlicedEllpack_segments.h | 7 + 26 files changed, 426 insertions(+), 92 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index ddf56b67d..df7cb5686 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -35,7 +35,7 @@ class CSR using ViewTemplate = CSRView< Device_, Index_ >; using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; - using SegmentView = CSRSegmentView< IndexType >; + using SegmentViewType = CSRSegmentView< IndexType >; CSR(); @@ -45,6 +45,8 @@ class CSR CSR( const CSR&& segments ); + static String getSerializationType(); + /** * \brief Set sizes of particular segments. */ @@ -86,7 +88,7 @@ class CSR void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 16e8a7763..9ab2186c3 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -54,6 +54,16 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) } +template< typename Device, + typename Index, + typename IndexAllocator > +String +CSR< Device, Index, IndexAllocator >:: +getSerializationType() +{ + return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index, typename IndexAllocator > @@ -164,7 +174,7 @@ template< typename Device, __cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); } diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 3af5798f7..860a35a0a 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -33,7 +33,7 @@ class CSRView template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; - using SegmentView = CSRSegmentView< IndexType >; + using SegmentViewType = CSRSegmentView< IndexType >; __cuda_callable__ CSRView(); @@ -50,6 +50,8 @@ class CSRView __cuda_callable__ CSRView( const CSRView&& csr_view ); + static String getSerializationType(); + ViewType getView(); ConstViewType getConstView() const; @@ -85,7 +87,7 @@ class CSRView void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 0135c8c68..f4f59370d 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -64,6 +64,15 @@ CSRView( const CSRView&& csr_view ) { } +template< typename Device, + typename Index > +String +CSRView< Device, Index >:: +getSerializationType() +{ + return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index > typename CSRView< Device, Index >::ViewType @@ -154,9 +163,9 @@ template< typename Device, __cuda_callable__ auto CSRView< Device, Index >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); + return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 0ecae8e7d..f73155335 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -37,7 +37,7 @@ class Ellpack using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; - using SegmentView = EllpackSegmentView< IndexType >; + using SegmentViewType = EllpackSegmentView< IndexType >; Ellpack(); @@ -50,6 +50,8 @@ class Ellpack Ellpack( const Ellpack&& segments ); + static String getSerializationType(); + ViewType getView(); //ConstViewType getConstView() const; @@ -83,7 +85,7 @@ class Ellpack void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 762d314dd..9f7702a6f 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -76,6 +76,18 @@ Ellpack( const Ellpack&& ellpack ) { } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +String +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSerializationType() +{ + return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index, typename IndexAllocator, @@ -224,7 +236,7 @@ template< typename Device, __cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( RowMajorOrder ) return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 ); diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 185321adb..682eeeb4a 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -38,7 +38,7 @@ class EllpackView using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView; //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; - using SegmentView = EllpackSegmentView< IndexType >; + using SegmentViewType = EllpackSegmentView< IndexType >; __cuda_callable__ EllpackView(); @@ -52,6 +52,8 @@ class EllpackView __cuda_callable__ EllpackView( const EllpackView&& ellpackView ); + static String getSerializationType(); + ViewType getView(); //ConstViewType getConstView() const; @@ -78,7 +80,7 @@ class EllpackView void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index 914d30a2e..f5dba4f3d 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -63,6 +63,17 @@ EllpackView( const EllpackView&& ellpack ) { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +String +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSerializationType() +{ + return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index, bool RowMajorOrder, @@ -167,12 +178,12 @@ template< typename Device, __cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( RowMajorOrder ) - return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); else - return SegmentView( segmentIdx, this->segmentSize, this->alignedSize ); + return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 8c01e8a28..1c110b1f1 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -36,7 +36,7 @@ class SlicedEllpack template< typename Device_, typename Index_ > using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; - using SegmentView = EllpackSegmentView< IndexType >; + using SegmentViewType = EllpackSegmentView< IndexType >; SlicedEllpack(); @@ -46,6 +46,8 @@ class SlicedEllpack SlicedEllpack( const SlicedEllpack&& segments ); + static String getSerializationType(); + ViewType getView(); ConstViewType getConstView() const; @@ -79,7 +81,7 @@ class SlicedEllpack void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 1f6479704..e2aec924d 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -69,6 +69,18 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack ) { } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSerializationType() +{ + return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index, typename IndexAllocator, @@ -249,7 +261,7 @@ template< typename Device, __cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; @@ -257,7 +269,7 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentView const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; if( RowMajorOrder ) - return SegmentView( sliceOffset, segmentSize, 1 ); + return SegmentView( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); } diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 890814b81..e87c75229 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -36,7 +36,7 @@ class SlicedEllpackView using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ViewType = SlicedEllpackView; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; - using SegmentView = EllpackSegmentView< IndexType >; + using SegmentViewType = EllpackSegmentView< IndexType >; __cuda_callable__ SlicedEllpackView(); @@ -54,6 +54,8 @@ class SlicedEllpackView __cuda_callable__ SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ); + static String getSerializationType(); + ViewType getView(); ConstViewType getConstView() const; @@ -80,7 +82,7 @@ class SlicedEllpackView void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; __cuda_callable__ - SegmentView getSegmentView( const IndexType segmentIdx ) const; + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; /*** * \brief Go over all segments and for each segment element call diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 45e33b236..139a09a15 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -72,6 +72,17 @@ SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) { } +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSerializationType() +{ + return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + template< typename Device, typename Index, bool RowMajorOrder, @@ -203,7 +214,7 @@ template< typename Device, __cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: -getSegmentView( const IndexType segmentIdx ) const -> SegmentView +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; @@ -211,9 +222,9 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentView const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; if( RowMajorOrder ) - return SegmentView( sliceOffset, segmentSize, 1 ); + return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else - return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); + return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); } template< typename Device, diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 96409c89b..66a686046 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -47,9 +47,9 @@ public: const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); - ViewType getView(); + /*ViewType getView(); - ConstViewType getConstView() const; + ConstViewType getConstView() const;*/ virtual void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 91b81ffcf..3a09d0088 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -43,7 +43,7 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType { } -template< typename Real, +/*template< typename Real, typename Device, typename Index, typename RealAllocator > @@ -63,7 +63,7 @@ Matrix< Real, Device, Index, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( rows, columns, values.getConstView() ); -} +}*/ template< typename Real, typename Device, diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 80fa28acf..18a9fb488 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -49,11 +49,11 @@ public: __cuda_callable__ MatrixView( const MatrixView& view ) = default; - __cuda_callable__ - ViewType getView(); + //__cuda_callable__ + //ViewType getView(); - __cuda_callable__ - ConstViewType getConstView() const; + //__cuda_callable__ + //ConstViewType getConstView() const; virtual IndexType getRowLength( const IndexType row ) const = 0; @@ -65,7 +65,7 @@ public: IndexType getNumberOfMatrixElements() const; - virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; + virtual IndexType getNumberOfNonzeroMatrixElements() const; void reset(); diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 55ebc3d67..0473f52b8 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -42,7 +42,7 @@ MatrixView( const IndexType rows_, { } -template< typename Real, +/*template< typename Real, typename Device, typename Index > __cuda_callable__ @@ -62,7 +62,7 @@ MatrixView< Real, Device, Index >:: getConstView() const -> ConstViewType { return ConstViewType( rows, columns, values.getConstView() ); -} +}*/ template< typename Real, typename Device, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 46c02dfb0..8169f89f2 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -36,8 +36,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; using SegmentsType = Segments< Device, Index, IndexAllocator >; template< typename Device_, typename Index_ > - using SegmentsViewTemplate = typename SegmentsType::ViewTemplate< Device_, Index >; - using SegmentViewType = typename SegmentsType::ViewType; + using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >; + using SegmentViewType = typename SegmentsType::SegmentViewType; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; @@ -46,10 +46,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; - using ColumnsVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using ValuesViewType = typename ValuesVectorType::ViewType; + using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< RealType, SegmentViewType >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -246,7 +248,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > // TODO: restore it and also in Matrix // protected: - ColumnsVectorType columnIndexes; + ColumnsIndexesVectorType columnIndexes; SegmentsType segments; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 3f26c95ca..c0dd3b9a3 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -87,7 +87,7 @@ getView() -> ViewType return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), - this->getColumnsIndexes().getView(), + this->columnIndexes.getView(), this->segments.getView() ); } @@ -105,7 +105,7 @@ getConstView() const -> ConstViewType return ConstViewType( this->getRows(), this->getColumns(), this->getValues().getConstView(), - this->getColumnsIndexes().getConstView(), + this->columnIndexes.getConstView(), this->segments.getConstView() ); } @@ -299,8 +299,6 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll reset() { Matrix< Real, Device, Index >::reset(); - this->columnIndexes.reset(); - } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h index c6d0468f9..19445f531 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.h +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -13,23 +13,23 @@ namespace TNL { namespace Matrices { -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > class SparseMatrixRowView { public: - using RealType = Real; + using RealType = typename ValuesView::RealType; using SegmentViewType = SegmentView; - using DeviceType = typename SegmentViewType::DeviceType; using IndexType = typename SegmentViewType::IndexType; - using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >; - using ColumnIndexesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ValuesViewType = ValuesView; + using ColumnsIndexesViewType = ColumnsIndexesView; __cuda_callable__ - SparseMatrixRowView( const SegmentView& segmentView, - const ValuesView& values, - const ColumnIndexesView& columnIndexes ); + SparseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes ); __cuda_callable__ IndexType getSize() const; @@ -52,11 +52,11 @@ class SparseMatrixRowView const RealType& value ); protected: - SegmentView segmentView; + SegmentViewType segmentView; - ValuesView values; + ValuesViewType values; - ColumnIndexesView columnIndexes; + ColumnsIndexesViewType columnIndexes; }; } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index 364bb8e2e..70dac874e 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -15,70 +15,77 @@ namespace TNL { namespace Matrices { -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ -SparseMatrixRowView< Real, SegmentView >:: -SparseMatrixRowView( const SegmentView& segmentView, - const ValuesView& values, - const ColumnIndexesView& columnIndexes ) +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes ) : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes ) { } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ auto -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: getSize() const -> IndexType { return segmentView.getSize(); } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ auto -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: getColumnIndex( const IndexType localIdx ) const -> const IndexType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ auto -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: getColumnIndex( const IndexType localIdx ) -> IndexType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ auto -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: getValue( const IndexType localIdx ) const -> const RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); return values[ segmentView.getGlobalIndex( localIdx ) ]; } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ auto -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: getValue( const IndexType localIdx ) -> RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); return values[ segmentView.getGlobalIndex( localIdx ) ]; } -template< typename Real, - typename SegmentView > +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView > __cuda_callable__ void -SparseMatrixRowView< Real, SegmentView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: setElement( const IndexType localIdx, const IndexType column, const RealType& value ) diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index a674ee807..714692df8 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -32,15 +32,16 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > template< typename Device_, typename Index_ > using SegmentsViewTemplate = SegmentsView< Device_, Index_ >; using SegmentsViewType = SegmentsView< Device, Index >; + using SegmentViewType = typename SegmentsViewType::SegmentViewType; using DeviceType = Device; using IndexType = Index; using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView; - using ColumnsViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< RealType, SegmentsViewType >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -55,9 +56,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ SparseMatrixView( const IndexType rows, const IndexType columns, - ValuesViewType& values, - ColumnsViewType& columnIndexes, - SegmentsViewType& segments ); + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes, + const SegmentsViewType& segments ); __cuda_callable__ SparseMatrixView( const SparseMatrixView& m ) = default; @@ -204,7 +205,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > protected: - ColumnsViewType columnIndexes; + ColumnsIndexesViewType columnIndexes; SegmentsViewType segments; }; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 3f9743124..5ac494a9b 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -37,9 +37,9 @@ __cuda_callable__ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: SparseMatrixView( const IndexType rows, const IndexType columns, - ValuesViewType& values, - ColumnsViewType& columnIndexes, - SegmentsViewType& segments ) + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes, + const SegmentsViewType& segments ) : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) { } @@ -57,7 +57,7 @@ getView() -> ViewType return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), - this->getColumnsIndexes().getView(), + this->columnIndexes.getView(), this->segments.getView() ); } @@ -89,7 +89,7 @@ getSerializationType() { return String( "Matrices::SparseMatrix< " ) + TNL::getSerializationType< RealType >() + ", " + - TNL::getSerializationType< SegmentsView >() + ", [any_device], " + + TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; } @@ -648,7 +648,7 @@ void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: save( File& file ) const { - Matrix< RealType, DeviceType, IndexType >::save( file ); + MatrixView< RealType, DeviceType, IndexType >::save( file ); file << this->columnIndexes; this->segments.save( file ); } diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 07a60178f..72dfc90e8 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include // Temporary, until test_OperatorEquals doesn't work for all formats. @@ -249,6 +250,232 @@ void test_Reset() EXPECT_EQ( m.getColumns(), 0 ); } +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + /*RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + }*/ + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: + val = 1; + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, val++ ); + break; + case 1: + val = 5; + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, val++ ); + break; + case 2: + val = 8; + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, val++ ); + break; + case 3: + val = 16; + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, val++ ); + break; + case 4: + row.setElement( 0, 0, 18 ); + break; + case 5: + row.setElement( 0, 0, 19 ); + break; + case 6: + row.setElement( 0, 0, 20 ); + break; + case 7: + row.setElement( 0, 0, 21 ); + break; + case 8: + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + case 9: + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + template< typename Matrix > void test_SetElement() { diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 353dcdbb0..e86e34f0a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -94,6 +94,14 @@ TYPED_TEST( CSRMatrixTest, resetTest ) test_Reset< CSRMatrixType >(); } +TYPED_TEST( CSRMatrixTest, getRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_GetRow< CSRMatrixType >(); +} + + TYPED_TEST( CSRMatrixTest, setElementTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index b7dc33834..f597e3199 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -105,6 +105,13 @@ TYPED_TEST( EllpackMatrixTest, resetTest ) test_Reset< EllpackMatrixType >(); } +TYPED_TEST( EllpackMatrixTest, getRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_GetRow< EllpackMatrixType >(); +} + TYPED_TEST( EllpackMatrixTest, setElementTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index b2404fe68..172ed722a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -106,6 +106,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, resetTest ) test_Reset< SlicedEllpackMatrixType >(); } +TYPED_TEST( SlicedEllpackMatrixTest, getRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_GetRow< SlicedEllpackMatrixType >(); +} + TYPED_TEST( SlicedEllpackMatrixTest, setElementTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -- GitLab From 30a8311f9609c586166c50ae852930ceff6c1944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 29 Dec 2019 22:49:02 +0100 Subject: [PATCH 045/179] Changing SpMV benchmark for testing new sparse matrix implementation. --- src/Benchmarks/SpMV/spmv.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 8a1b0614e..8a222c7b5 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -287,15 +287,20 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, bool verboseMR ) { benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); - //benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); + + benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); //// // Segments based sparse matrices - benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); - //benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); + + + // // AdEllpack is broken // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); -- GitLab From e07a01684e5f10209a6660c8cfb4e9b2ebcd20db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 30 Dec 2019 15:20:48 +0100 Subject: [PATCH 046/179] Added boolean compute to stop segment reduction. --- src/TNL/Containers/Segments/CSR.hpp | 7 +++--- src/TNL/Containers/Segments/CSRView.hpp | 7 +++--- src/TNL/Containers/Segments/Ellpack.hpp | 13 ++++++----- src/TNL/Containers/Segments/EllpackView.hpp | 13 ++++++----- src/TNL/Containers/Segments/SlicedEllpack.hpp | 14 +++++++----- .../Containers/Segments/SlicedEllpackView.hpp | 22 +++++++++++-------- src/TNL/Matrices/SparseMatrix.hpp | 7 +++--- src/TNL/Matrices/SparseMatrixView.hpp | 5 +++-- .../Containers/Segments/SegmentsTest.hpp | 2 +- 9 files changed, 51 insertions(+), 39 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 9ab2186c3..83da548fc 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -218,14 +218,15 @@ void CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index f4f59370d..b4304ee32 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -204,14 +204,15 @@ void CSRView< Device, Index >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 9f7702a6f..ebc2b360e 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -306,31 +306,32 @@ void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); const IndexType segmentSize = this->segmentSize; auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } else { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); const IndexType storageSize = this->getStorageSize(); const IndexType alignedSize = this->alignedSize; auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i; const IndexType end = storageSize; RealType aux( zero ); - for( IndexType j = begin; j < end; j += alignedSize ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j += alignedSize ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index f5dba4f3d..dc6bd485d 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -245,31 +245,32 @@ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); const IndexType segmentSize = this->segmentSize; auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } else { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); const IndexType storageSize = this->getStorageSize(); const IndexType alignedSize = this->alignedSize; auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = i; const IndexType end = storageSize; RealType aux( zero ); - for( IndexType j = begin; j < end; j += alignedSize ) - reduction( aux, fetch( i, j, args... ) ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j += alignedSize ) + reduction( aux, fetch( i, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index e2aec924d..ecd32abb2 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes ) const auto sizes_view = sizes.getConstView(); auto slices_view = this->sliceOffsets.getView(); auto slice_segment_size_view = this->sliceSegmentSizes.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { if( globalIdx < _size ) return sizes_view[ globalIdx ]; return 0; @@ -341,7 +341,7 @@ void SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) @@ -353,8 +353,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); - for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) - reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++ ) + reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -368,8 +369,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 139a09a15..41b49ed15 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -247,8 +247,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; IndexType localIdx( 0 ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) - if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -262,8 +263,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; IndexType localIdx( 0 ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) ) break; }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -291,7 +293,7 @@ void SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) @@ -303,8 +305,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); - for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) - reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++ ) + reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -318,8 +321,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - reduction( aux, fetch( segmentIdx, globalIdx, args... ) ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index c0dd3b9a3..691157a9c 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -628,9 +628,10 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ offset ]; - if( column == paddingIndex ) + compute = ( column != paddingIndex ); + if( ! compute ) return 0.0; return valuesView[ offset ] * inVectorView[ column ]; }; @@ -658,7 +659,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 5ac494a9b..ce0e7aa18 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -508,9 +508,10 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ offset ]; - if( column == paddingIndex ) + compute = ( column != paddingIndex ); + if( ! compute ) return 0.0; return valuesView[ offset ] * inVectorView[ column ]; }; diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 5e74f96b0..6189c2e9a 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments() const auto v_view = v.getConstView(); auto result_view = result.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx ) -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { return v_view[ globalIdx ]; }; auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { -- GitLab From 9b45cc2e843ec3062224bb5a4873ea97ce3faf07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 30 Dec 2019 17:10:43 +0100 Subject: [PATCH 047/179] Ignoring flag compute in SlicedEllpack -> it makes SlicedEllpack faster. --- src/TNL/Containers/Segments/SlicedEllpack.hpp | 4 ++-- src/TNL/Containers/Segments/SlicedEllpackView.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index ecd32abb2..b58b6a954 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -354,7 +354,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); - for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++ ) + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; @@ -370,7 +370,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); bool compute( true ); - for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 41b49ed15..82570664f 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -306,7 +306,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); - for( IndexType globalIdx = begin; globalIdx< end && compute; globalIdx++ ) + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; @@ -322,7 +322,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); bool compute( true ); - for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; -- GitLab From 6bb460d87b0a34672d3602828b790a13369547e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 30 Dec 2019 18:28:24 +0100 Subject: [PATCH 048/179] Original implementation of the sparse matrices was moved to Matrices/Legacy. --- src/Benchmarks/BLAS/spmv.h | 8 ++++---- .../DistSpMV/tnl-benchmark-distributed-spmv.h | 2 +- .../LinearSolvers/tnl-benchmark-linear-solvers.h | 2 +- .../ODESolvers/tnl-benchmark-ode-solvers.h | 2 +- src/Benchmarks/SpMV/spmv.h | 12 ++++++------ src/Python/pytnl/tnl/SparseMatrix.cpp | 6 +++--- src/Python/pytnl/tnl/SparseMatrix.h | 2 +- src/TNL/Matrices/DistributedMatrix.h | 2 +- src/TNL/Matrices/{ => Legacy}/AdEllpack.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/AdEllpack_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/BiEllpack.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric.h | 0 .../Matrices/{ => Legacy}/BiEllpackSymmetric_impl.h | 0 src/TNL/Matrices/{ => Legacy}/BiEllpack_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/CSR.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/CSR_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/ChunkedEllpack.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/ChunkedEllpack_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/Ellpack.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/EllpackSymmetric.h | 0 .../Matrices/{ => Legacy}/EllpackSymmetricGraph.h | 0 .../{ => Legacy}/EllpackSymmetricGraph_impl.h | 0 .../Matrices/{ => Legacy}/EllpackSymmetric_impl.h | 0 src/TNL/Matrices/{ => Legacy}/Ellpack_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/SlicedEllpack.h | 4 ++-- .../Matrices/{ => Legacy}/SlicedEllpackSymmetric.h | 0 .../{ => Legacy}/SlicedEllpackSymmetricGraph.h | 0 .../{ => Legacy}/SlicedEllpackSymmetricGraph_impl.h | 0 .../{ => Legacy}/SlicedEllpackSymmetric_impl.h | 0 src/TNL/Matrices/{ => Legacy}/SlicedEllpack_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/Sparse.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/SparseRow.h | 2 +- src/TNL/Matrices/{ => Legacy}/SparseRow_impl.h | 2 +- src/TNL/Matrices/{ => Legacy}/Sparse_impl.h | 0 src/TNL/Problems/HeatEquationProblem.h | 2 +- src/TNL/Problems/PDEProblem.h | 2 +- src/TNL/Solvers/Linear/Preconditioners/ILU0.h | 2 +- src/TNL/Solvers/Linear/Preconditioners/ILUT.h | 2 +- src/TNL/Solvers/SolverConfig_impl.h | 3 +-- src/UnitTests/Matrices/DistributedMatrixTest.h | 2 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 6 +++--- src/UnitTests/Matrices/SparseMatrixTest.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest.hpp | 6 +++--- src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest_CSR.h | 2 +- .../Matrices/SparseMatrixTest_ChunkedEllpack.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h | 2 +- 48 files changed, 58 insertions(+), 59 deletions(-) rename src/TNL/Matrices/{ => Legacy}/AdEllpack.h (99%) rename src/TNL/Matrices/{ => Legacy}/AdEllpack_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/BiEllpack.h (98%) rename src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric.h (100%) rename src/TNL/Matrices/{ => Legacy}/BiEllpackSymmetric_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/BiEllpack_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/CSR.h (99%) rename src/TNL/Matrices/{ => Legacy}/CSR_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/ChunkedEllpack.h (99%) rename src/TNL/Matrices/{ => Legacy}/ChunkedEllpack_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/Ellpack.h (98%) rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetric.h (100%) rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetricGraph.h (100%) rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetricGraph_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/EllpackSymmetric_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/Ellpack_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpack.h (98%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetric.h (100%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetricGraph.h (100%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetricGraph_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpackSymmetric_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/SlicedEllpack_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/Sparse.h (95%) rename src/TNL/Matrices/{ => Legacy}/SparseRow.h (97%) rename src/TNL/Matrices/{ => Legacy}/SparseRow_impl.h (99%) rename src/TNL/Matrices/{ => Legacy}/Sparse_impl.h (100%) diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h index d515d52d7..6685b9f76 100644 --- a/src/Benchmarks/BLAS/spmv.h +++ b/src/Benchmarks/BLAS/spmv.h @@ -15,10 +15,10 @@ #include "../Benchmarks.h" #include -#include -#include -#include -#include +#include +#include +#include +#include namespace TNL { namespace Benchmarks { diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h index aa4b29424..b90b11088 100644 --- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h @@ -30,7 +30,7 @@ #include "../Benchmarks.h" #include "ordering.h" -#include +#include using namespace TNL; using namespace TNL::Benchmarks; diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 4aabf39cd..9c58d25b0 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -55,7 +55,7 @@ #define HAVE_CUSOLVER #endif -#include +#include using namespace TNL; using namespace TNL::Benchmarks; diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h index d29b680bc..1e4bc380e 100644 --- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h @@ -36,7 +36,7 @@ #include "Euler.h" #include "Merson.h" -#include +#include using namespace TNL; using namespace TNL::Benchmarks; diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 8a222c7b5..e3a1ae047 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -17,12 +17,12 @@ #include "../Benchmarks.h" #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include diff --git a/src/Python/pytnl/tnl/SparseMatrix.cpp b/src/Python/pytnl/tnl/SparseMatrix.cpp index e65849983..fe3ba5aca 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.cpp +++ b/src/Python/pytnl/tnl/SparseMatrix.cpp @@ -3,9 +3,9 @@ #include "SparseMatrix.h" -#include -#include -#include +#include +#include +#include using CSR_host = TNL::Matrices::CSR< double, TNL::Devices::Host, int >; using CSR_cuda = TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >; diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 1a32bd257..03ec5814c 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -5,7 +5,7 @@ namespace py = pybind11; #include #include -#include +#include template< typename Matrix > struct SpecificExports diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h index 76b6ea8c1..05ee28391 100644 --- a/src/TNL/Matrices/DistributedMatrix.h +++ b/src/TNL/Matrices/DistributedMatrix.h @@ -14,7 +14,7 @@ #include -#include +#include #include #include #include diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h similarity index 99% rename from src/TNL/Matrices/AdEllpack.h rename to src/TNL/Matrices/Legacy/AdEllpack.h index f011e6c80..3d2db7b96 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/Legacy/AdEllpack.h @@ -18,7 +18,7 @@ #pragma once -#include +#include #include namespace TNL { @@ -293,4 +293,4 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/AdEllpack_impl.h rename to src/TNL/Matrices/Legacy/AdEllpack_impl.h index b7b97ff93..234e18f94 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include #include #include diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h similarity index 98% rename from src/TNL/Matrices/BiEllpack.h rename to src/TNL/Matrices/Legacy/BiEllpack.h index 3ec4b662f..fe3fd9e11 100644 --- a/src/TNL/Matrices/BiEllpack.h +++ b/src/TNL/Matrices/Legacy/BiEllpack.h @@ -18,7 +18,7 @@ #pragma once -#include +#include #include namespace TNL { @@ -217,5 +217,5 @@ private: } //namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/BiEllpackSymmetric.h rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric.h diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/BiEllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/BiEllpack_impl.h rename to src/TNL/Matrices/Legacy/BiEllpack_impl.h index c659b758e..36732a39a 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h @@ -11,7 +11,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/Legacy/CSR.h similarity index 99% rename from src/TNL/Matrices/CSR.h rename to src/TNL/Matrices/Legacy/CSR.h index 485176d1d..b68434252 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/Legacy/CSR.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include @@ -272,4 +272,4 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h similarity index 99% rename from src/TNL/Matrices/CSR_impl.h rename to src/TNL/Matrices/Legacy/CSR_impl.h index db31d6dcd..08b35f563 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/Legacy/CSR_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/Legacy/ChunkedEllpack.h similarity index 99% rename from src/TNL/Matrices/ChunkedEllpack.h rename to src/TNL/Matrices/Legacy/ChunkedEllpack.h index 9d4220796..a0f55b326 100644 --- a/src/TNL/Matrices/ChunkedEllpack.h +++ b/src/TNL/Matrices/Legacy/ChunkedEllpack.h @@ -22,7 +22,7 @@ #pragma once -#include +#include #include namespace TNL { @@ -352,5 +352,5 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/ChunkedEllpack_impl.h rename to src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h index 3b1fd9c8f..406159752 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h similarity index 98% rename from src/TNL/Matrices/Ellpack.h rename to src/TNL/Matrices/Legacy/Ellpack.h index 6536f5f6c..5f6e666f9 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Legacy/Ellpack.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include namespace TNL { @@ -207,4 +207,4 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/Legacy/EllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetric.h rename to src/TNL/Matrices/Legacy/EllpackSymmetric.h diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetricGraph.h rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetricGraph_impl.h rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h similarity index 99% rename from src/TNL/Matrices/Ellpack_impl.h rename to src/TNL/Matrices/Legacy/Ellpack_impl.h index 5ae12f408..656c3f7c2 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h similarity index 98% rename from src/TNL/Matrices/SlicedEllpack.h rename to src/TNL/Matrices/Legacy/SlicedEllpack.h index 7176019d2..b79913b23 100644 --- a/src/TNL/Matrices/SlicedEllpack.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h @@ -21,7 +21,7 @@ #pragma once -#include +#include #include namespace TNL { @@ -235,4 +235,4 @@ public: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetric.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/SlicedEllpack_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpack_impl.h index 8c629b563..bfba092ff 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h similarity index 95% rename from src/TNL/Matrices/Sparse.h rename to src/TNL/Matrices/Legacy/Sparse.h index c19002443..12c76a6a0 100644 --- a/src/TNL/Matrices/Sparse.h +++ b/src/TNL/Matrices/Legacy/Sparse.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace TNL { namespace Matrices { @@ -64,5 +64,5 @@ class Sparse : public Matrix< Real, Device, Index > } // namespace Matrices } // namespace TNL -#include +#include #include diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/Legacy/SparseRow.h similarity index 97% rename from src/TNL/Matrices/SparseRow.h rename to src/TNL/Matrices/Legacy/SparseRow.h index f66cd2cea..4787e638a 100644 --- a/src/TNL/Matrices/SparseRow.h +++ b/src/TNL/Matrices/Legacy/SparseRow.h @@ -80,4 +80,4 @@ std::ostream& operator<<( std::ostream& str, const SparseRow< Real, Index >& row } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/Legacy/SparseRow_impl.h similarity index 99% rename from src/TNL/Matrices/SparseRow_impl.h rename to src/TNL/Matrices/Legacy/SparseRow_impl.h index 60dfd5034..84f8e210e 100644 --- a/src/TNL/Matrices/SparseRow_impl.h +++ b/src/TNL/Matrices/Legacy/SparseRow_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include // Following includes are here to enable usage of std::vector and std::cout. To avoid having to include Device type (HOW would this be done anyway) diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h similarity index 100% rename from src/TNL/Matrices/Sparse_impl.h rename to src/TNL/Matrices/Legacy/Sparse_impl.h diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h index 26df28965..6a8974227 100644 --- a/src/TNL/Problems/HeatEquationProblem.h +++ b/src/TNL/Problems/HeatEquationProblem.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h index 69d95aaee..c81ffdd39 100644 --- a/src/TNL/Problems/PDEProblem.h +++ b/src/TNL/Problems/PDEProblem.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include namespace TNL { diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h index d5127fab5..1f2b9f198 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h @@ -15,7 +15,7 @@ #include "Preconditioner.h" #include -#include +#include #include #include diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h index cce3dc5c4..6a4a4a83b 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h @@ -15,7 +15,7 @@ #include "Preconditioner.h" #include -#include +#include namespace TNL { namespace Solvers { diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h index 9d3515157..70e7737ee 100644 --- a/src/TNL/Solvers/SolverConfig_impl.h +++ b/src/TNL/Solvers/SolverConfig_impl.h @@ -16,8 +16,7 @@ #include #include #include -#include -#include +#include namespace TNL { namespace Solvers { diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h index 93673a290..a1a9f3eb8 100644 --- a/src/UnitTests/Matrices/DistributedMatrixTest.h +++ b/src/UnitTests/Matrices/DistributedMatrixTest.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include using namespace TNL; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index d100bb939..f00daf1f3 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -8,9 +8,9 @@ /* See Copyright Notice in tnl/Copyright */ -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 5baeb4279..8b1d57566 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 72dfc90e8..c6ff5cbd7 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -15,9 +15,9 @@ #include // Temporary, until test_OperatorEquals doesn't work for all formats. -#include -#include -#include +#include +#include +#include #ifdef HAVE_GTEST #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h index 7effb52cd..2169b96df 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h index 33e530be5..c74fa635f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index 3530db46c..c9dfc770f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h index 6909b53a5..45801fa3a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 979068e02..26d270a3d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include +#include #include "SparseMatrixTest.hpp" #include -- GitLab From 2e95ddda03edf3135757df4217ac338a99e9d92f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 2 Jan 2020 14:40:01 +0100 Subject: [PATCH 049/179] Isolating legacy sparse matrix formats unit tests. --- src/TNL/Containers/Segments/SlicedEllpack.h | 1 + src/UnitTests/Matrices/CMakeLists.txt | 49 +- src/UnitTests/Matrices/Legacy/CMakeLists.txt | 72 + .../Matrices/Legacy/SparseMatrixCopyTest.cpp | 11 + .../Matrices/Legacy/SparseMatrixCopyTest.cu | 11 + .../Matrices/Legacy/SparseMatrixCopyTest.h | 573 ++++++ .../Matrices/Legacy/SparseMatrixTest.cpp | 11 + .../Matrices/Legacy/SparseMatrixTest.cu | 11 + .../Matrices/Legacy/SparseMatrixTest.h | 39 + .../Matrices/Legacy/SparseMatrixTest.hpp | 1831 +++++++++++++++++ .../SparseMatrixTest_AdEllpack.cpp | 0 .../SparseMatrixTest_AdEllpack.cu | 0 .../{ => Legacy}/SparseMatrixTest_AdEllpack.h | 2 +- .../SparseMatrixTest_BiEllpack.cpp | 0 .../SparseMatrixTest_BiEllpack.cu | 0 .../{ => Legacy}/SparseMatrixTest_BiEllpack.h | 2 +- .../{ => Legacy}/SparseMatrixTest_CSR.cpp | 0 .../{ => Legacy}/SparseMatrixTest_CSR.cu | 0 .../{ => Legacy}/SparseMatrixTest_CSR.h | 2 +- .../SparseMatrixTest_ChunkedEllpack.cpp | 0 .../SparseMatrixTest_ChunkedEllpack.cu | 0 .../SparseMatrixTest_ChunkedEllpack.h | 2 +- .../{ => Legacy}/SparseMatrixTest_Ellpack.cpp | 0 .../{ => Legacy}/SparseMatrixTest_Ellpack.cu | 0 .../{ => Legacy}/SparseMatrixTest_Ellpack.h | 2 +- .../Legacy/SparseMatrixTest_SlicedEllpack.cpp | 1 + .../Legacy/SparseMatrixTest_SlicedEllpack.cu | 1 + .../SparseMatrixTest_SlicedEllpack.h | 66 +- .../SparseMatrixTest_SlicedEllpack.cpp | 1 - .../SparseMatrixTest_SlicedEllpack.cu | 1 - 30 files changed, 2598 insertions(+), 91 deletions(-) create mode 100644 src/UnitTests/Matrices/Legacy/CMakeLists.txt create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.h create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.cpp (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.cu (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_AdEllpack.h (99%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.cpp (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.cu (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_BiEllpack.h (99%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.cpp (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.cu (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_CSR.h (99%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.cpp (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.cu (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_ChunkedEllpack.h (99%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.cpp (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.cu (100%) rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_Ellpack.h (99%) create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp create mode 100644 src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu rename src/UnitTests/Matrices/{ => Legacy}/SparseMatrixTest_SlicedEllpack.h (53%) delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 1c110b1f1..76185bcac 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 9b168bd56..668e272df 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -1,3 +1,5 @@ +ADD_SUBDIRECTORY( Legacy ) + IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) @@ -5,24 +7,6 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) - - CUDA_ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) - - CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) - - CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) - - CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) - - CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -46,30 +30,6 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) - - ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) - - ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) - - ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) - - ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) - - ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -95,11 +55,6 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: DenseMatrixTest is not finished #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt new file mode 100644 index 000000000..9cdfe2784 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt @@ -0,0 +1,72 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ENDIF( BUILD_CUDA ) + + +ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +# TODO: Uncomment the following when AdEllpack works +#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +# TODO: DenseMatrixTest is not finished +#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp new file mode 100644 index 000000000..30b8f64ec --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixCopyTest.cpp - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu new file mode 100644 index 000000000..431fe481c --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixCopyTest.cu - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h new file mode 100644 index 000000000..7069fd777 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h @@ -0,0 +1,573 @@ +/*************************************************************************** + SparseMatrixCopyTest.h - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; +using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; +using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; +using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; +using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; +using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/ + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; + + +#ifdef HAVE_GTEST +#include + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 12 13 | + * | 14 15 16 17 18 | + * | 19 20 | + * | 21 | + * | 22 | + * | 23 24 25 26 27 | + * \ 28 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + m.setElement( 6, 0, value++ ); // 6th row + + m.setElement( 7, 0, value++ ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + m.setElement( 9, 5, value++ ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 3 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 5 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 6 ); + EXPECT_EQ( m.getElement( 2, 1 ), 7 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 9 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 11 ); + EXPECT_EQ( m.getElement( 3, 4 ), 12 ); + EXPECT_EQ( m.getElement( 3, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 20 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 21 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); + EXPECT_EQ( m.getElement( 8, 1 ), 24 ); + EXPECT_EQ( m.getElement( 8, 2 ), 25 ); + EXPECT_EQ( m.getElement( 8, 3 ), 26 ); + EXPECT_EQ( m.getElement( 8, 4 ), 27 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 28 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 2 1 \ + * | 5 4 3 | + * | 8 7 6 | + * | 11 10 9 | + * | 14 13 12 | + * | 16 15 | + * \ 17 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, value++ ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 2 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 3 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 6 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 13 ); + EXPECT_EQ( m.getElement( 4, 2 ), 12 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 16 ); + EXPECT_EQ( m.getElement( 5, 1 ), 15 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 17 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 | + * | 12 13 14 | + * | 15 16 | + * \ 17 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, value++ ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 3 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 6 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 9 ); + EXPECT_EQ( m.getElement( 3, 3 ), 10 ); + EXPECT_EQ( m.getElement( 3, 4 ), 11 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 12 ); + EXPECT_EQ( m.getElement( 4, 4 ), 13 ); + EXPECT_EQ( m.getElement( 4, 5 ), 14 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 15 ); + EXPECT_EQ( m.getElement( 5, 5 ), 16 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 17 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix1, typename Matrix2 > +void testConversion() +{ + + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +TEST( SparseMatrixCopyTest, CSR_HostToHost ) +{ + testCopyAssignment< CSR_host, CSR_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, CSR_HostToCuda ) +{ + testCopyAssignment< CSR_host, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, CSR_CudaToHost ) +{ + testCopyAssignment< CSR_cuda, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, CSR_CudaToCuda ) +{ + testCopyAssignment< CSR_cuda, CSR_cuda >(); +} +#endif + + +TEST( SparseMatrixCopyTest, Ellpack_HostToHost ) +{ + testCopyAssignment< E_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, Ellpack_HostToCuda ) +{ + testCopyAssignment< E_host, E_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_CudaToHost ) +{ + testCopyAssignment< E_cuda, E_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_CudaToCuda ) +{ + testCopyAssignment< E_cuda, E_cuda >(); +} +#endif + + +TEST( SparseMatrixCopyTest, SlicedEllpack_HostToHost ) +{ + testCopyAssignment< SE_host, SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, SlicedEllpack_HostToCuda ) +{ + testCopyAssignment< SE_host, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToHost ) +{ + testCopyAssignment< SE_cuda, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) +{ + testCopyAssignment< SE_cuda, SE_cuda >(); +} +#endif + + +// test conversion between formats +TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host ) +{ + testConversion< CSR_host, E_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_CSR_host ) +{ + testConversion< E_host, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_host ) +{ + testConversion< CSR_host, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_host ) +{ + testConversion< SE_host, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host ) +{ + testConversion< E_host, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host ) +{ + testConversion< SE_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, CSR_to_Ellpack_cuda ) +{ + testConversion< CSR_cuda, E_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_CSR_cuda ) +{ + testConversion< E_cuda, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda ) +{ + testConversion< CSR_cuda, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda ) +{ + testConversion< SE_cuda, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda ) +{ + testConversion< E_cuda, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) +{ + testConversion< SE_cuda, E_cuda >(); +} +#endif + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp new file mode 100644 index 000000000..46f6b9bd3 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixTest.cpp - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu new file mode 100644 index 000000000..01c23c193 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixTest.cu - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h new file mode 100644 index 000000000..ed8bec796 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h @@ -0,0 +1,39 @@ +/*************************************************************************** + SparseMatrixTest.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include + +#include "SparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >; +using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; + +using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >; +using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; + +TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host ) +{ + test_PerformSORIteration< CSR_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda ) +{ + // test_PerformSORIteration< CSR_cuda_float >(); +} +#endif + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp new file mode 100644 index 000000000..c6ff5cbd7 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp @@ -0,0 +1,1831 @@ +/*************************************************************************** + SparseMatrixTest_impl.h - description + ------------------- + begin : Nov 22, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include +#include + +// Temporary, until test_OperatorEquals doesn't work for all formats. +#include +#include +#include + +#ifdef HAVE_GTEST +#include + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); + + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + + EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 ); + EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 ); + EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 ); + EXPECT_EQ( m.getNonZeroRowLength( 3 ), 2 ); + EXPECT_EQ( m.getNonZeroRowLength( 4 ), 3 ); + EXPECT_EQ( m.getNonZeroRowLength( 5 ), 4 ); + EXPECT_EQ( m.getNonZeroRowLength( 6 ), 5 ); + EXPECT_EQ( m.getNonZeroRowLength( 7 ), 6 ); + EXPECT_EQ( m.getNonZeroRowLength( 8 ), 7 ); + EXPECT_EQ( m.getNonZeroRowLength( 9 ), 8 ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); + + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + } + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.setDimensions( rows, cols ); + + m.reset(); + + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + /*RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + }*/ + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: + val = 1; + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, val++ ); + break; + case 1: + val = 5; + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, val++ ); + break; + case 2: + val = 8; + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, val++ ); + break; + case 3: + val = 16; + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, val++ ); + break; + case 4: + row.setElement( 0, 0, 18 ); + break; + case 5: + row.setElement( 0, 0, 19 ); + break; + case 6: + row.setElement( 0, 0, 20 ); + break; + case 7: + row.setElement( 0, 0, 21 ); + break; + case 8: + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + case 9: + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 3, 0, value++ ); // 3rd row + + m.setElement( 4, 1, value++ ); // 4th row + + m.setElement( 5, 3, value++ ); // 5th row + + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + +/* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + +/* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 0 0 \ + * | 0 12 15 18 0 | + * | 0 0 21 24 27 | + * | 30 11 12 0 0 | + * | 0 35 14 15 0 | + * \ 0 0 16 41 18 / + */ + + RealType newValue = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.addElement( 0, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.addElement( 1, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.addElement( 2, i, newValue++, 2.0 ); + + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row + m.addElement( 3, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row + m.addElement( 4, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 5th row + m.addElement( 5, i, newValue++, 2.0 ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 12 ); + EXPECT_EQ( m.getElement( 1, 2 ), 15 ); + EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 21 ); + EXPECT_EQ( m.getElement( 2, 3 ), 24 ); + EXPECT_EQ( m.getElement( 2, 4 ), 27 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 35 ); + EXPECT_EQ( m.getElement( 4, 2 ), 14 ); + EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 16 ); + EXPECT_EQ( m.getElement( 5, 3 ), 41 ); + EXPECT_EQ( m.getElement( 5, 4 ), 18 ); +} + +template< typename Matrix > +void test_SetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 3x7 sparse matrix: + * + * / 0 0 0 1 1 1 0 \ + * | 2 2 2 0 0 0 0 | + * \ 3 3 3 0 0 0 0 / + */ + + const IndexType rows = 3; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 6 ); + rowLengths.setElement( 1, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) + { + m.setElement( 0, i + 3, value ); + m.setElement( 1, i, value + 1 ); + m.setElement( 2, i, value + 2 ); + } + + RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 }; + RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 }; + RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 }; + + RealType row = 0; + IndexType elements = 3; + + m.setRow( row++, colIndexes1, row1, elements ); + m.setRow( row++, colIndexes2, row2, elements ); + m.setRow( row++, colIndexes3, row3, elements ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); + EXPECT_EQ( m.getElement( 1, 1 ), 22 ); + EXPECT_EQ( m.getElement( 1, 2 ), 22 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 0 ); + EXPECT_EQ( m.getElement( 2, 3 ), 33 ); + EXPECT_EQ( m.getElement( 2, 4 ), 33 ); + EXPECT_EQ( m.getElement( 2, 5 ), 33 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); + + RealType value_1 = 1; + m_1.setElement( 0, 0, value_1++ ); // 0th row + + m_1.setElement( 1, 1, value_1++ ); // 1st row + m_1.setElement( 1, 3, value_1++ ); + + m_1.setElement( 2, 1, value_1++ ); // 2nd row + + m_1.setElement( 3, 2, value_1++ ); // 3rd row + + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); + + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); + + + m_1.vectorProduct( inVector_1, outVector_1 ); + + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2; + m_2.reset(); + m_2.setDimensions( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2; + rowLengths_2.setSize( m_rows_2 ); + rowLengths_2.setValue( 3 ); + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 ); + m_2.setCompressedRowLengths( rowLengths_2 ); + + RealType value_2 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, value_2++ ); + + m_2.setElement( 1, 3, value_2++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, value_2++ ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, value_2++ ); + + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); + + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + + m_2.vectorProduct( inVector_2, outVector_2 ); + + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3; + m_3.reset(); + m_3.setDimensions( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3; + rowLengths_3.setSize( m_rows_3 ); + rowLengths_3.setValue( 3 ); + m_3.setCompressedRowLengths( rowLengths_3 ); + + RealType value_3 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, value_3++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, value_3++ ); + + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); + + + m_3.vectorProduct( inVector_3, outVector_3 ); + + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + + +/* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4; + m_4.reset(); + m_4.setDimensions( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4; + rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 ); + m_4.setCompressedRowLengths( rowLengths_4 ); + + RealType value_4 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, value_4++ ); + + m_4.setElement( 0, 5, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, value_4++ ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, value_4++ ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, value_4++ ); + + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); + + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + + + m_4.vectorProduct( inVector_4, outVector_4 ); + + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5; + m_5.reset(); + m_5.setDimensions( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5; + rowLengths_5.setSize( m_rows_5 ); + rowLengths_5.setElement(0, 6); + rowLengths_5.setElement(1, 3); + rowLengths_5.setElement(2, 4); + rowLengths_5.setElement(3, 5); + rowLengths_5.setElement(4, 2); + rowLengths_5.setElement(5, 7); + rowLengths_5.setElement(6, 8); + rowLengths_5.setElement(7, 8); + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, value_5++ ); + + m_5.setElement( 0, 4, value_5++ ); // 0th row + m_5.setElement( 0, 5, value_5++ ); + + m_5.setElement( 1, 1, value_5++ ); // 1st row + m_5.setElement( 1, 3, value_5++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, value_5++ ); + + m_5.setElement( 2, 4, value_5++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, value_5++ ); + + m_5.setElement( 4, 1, value_5++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, value_5++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities( rows ); + //rowLengths.setSize( rows ); + rowsCapacities.setElement(0, 6); + rowsCapacities.setElement(1, 3); + rowsCapacities.setElement(2, 4); + rowsCapacities.setElement(3, 5); + rowsCapacities.setElement(4, 2); + rowsCapacities.setElement(5, 7); + rowsCapacities.setElement(6, 8); + rowsCapacities.setElement(7, 8); + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +// This test is only for AdEllpack +template< typename Matrix > +void test_OperatorEquals() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) + return; + else + { + using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; + using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows = 8; + const IndexType m_cols = 8; + + AdELL_host m_host; + + m_host.reset(); + m_host.setDimensions( m_rows, m_cols ); + typename AdELL_host::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setElement(0, 6); + rowLengths.setElement(1, 3); + rowLengths.setElement(2, 4); + rowLengths.setElement(3, 5); + rowLengths.setElement(4, 2); + rowLengths.setElement(5, 7); + rowLengths.setElement(6, 8); + rowLengths.setElement(7, 8); + m_host.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_host.setElement( 0, i, value++ ); + + m_host.setElement( 0, 4, value++ ); // 0th row + m_host.setElement( 0, 5, value++ ); + + m_host.setElement( 1, 1, value++ ); // 1st row + m_host.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_host.setElement( 2, i, value++ ); + + m_host.setElement( 2, 4, value++ ); // 2nd row + + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_host.setElement( 3, i, value++ ); + + m_host.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_host.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_host.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_host.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 + m_host.setElement( i, 7, 1); + + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + AdELL_cuda m_cuda; + + // Copy the host matrix into the cuda matrix + m_cuda = m_host; + + // Reset the host matrix + m_host.reset(); + + // Copy the cuda matrix back into the host matrix + m_host = m_cuda; + + // Check the newly created double-copy host matrix + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + // Try vectorProduct with copied cuda matrix to see if it works correctly. + using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; + + VectorType inVector; + inVector.setSize( m_cols ); + for( IndexType i = 0; i < inVector.getSize(); i++ ) + inVector.setElement( i, 2 ); + + VectorType outVector; + outVector.setSize( m_rows ); + for( IndexType j = 0; j < outVector.getSize(); j++ ) + outVector.setElement( j, 0 ); + + m_cuda.vectorProduct( inVector, outVector ); + + EXPECT_EQ( outVector.getElement( 0 ), 32 ); + EXPECT_EQ( outVector.getElement( 1 ), 28 ); + EXPECT_EQ( outVector.getElement( 2 ), 56 ); + EXPECT_EQ( outVector.getElement( 3 ), 102 ); + EXPECT_EQ( outVector.getElement( 4 ), 32 ); + EXPECT_EQ( outVector.getElement( 5 ), 224 ); + EXPECT_EQ( outVector.getElement( 6 ), 352 ); + EXPECT_EQ( outVector.getElement( 7 ), 520 ); + } +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * \ 0 9 10 11 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix; + savedMatrix.reset(); + savedMatrix.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, value++ ); + + savedMatrix.setElement( 1, 1, value++ ); + savedMatrix.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + loadedMatrix.reset(); + loadedMatrix.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths2; + rowLengths2.setSize( m_rows ); + rowLengths2.setValue( 3 ); + loadedMatrix.setCompressedRowLengths( rowLengths2 ); + + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + #include + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" + "Row: 1 -> Col:3->4\t\n" + "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" + "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" + "Row: 4 -> Col:2->11 Col:3->12\t\n"; + + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h similarity index 99% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h index 2169b96df..d2d268dac 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h @@ -133,4 +133,4 @@ TYPED_TEST( AdEllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h similarity index 99% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h index c74fa635f..9dab63c1a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h @@ -142,4 +142,4 @@ TYPED_TEST( BiEllpackMatrixTest, printTest ) } #endif // HAVE_GTEST -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h similarity index 99% rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h index c9dfc770f..3cae12e3a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h @@ -136,4 +136,4 @@ TYPED_TEST( CSRMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h similarity index 99% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h index 45801fa3a..a3c049910 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h @@ -144,4 +144,4 @@ TYPED_TEST( ChunkedEllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h similarity index 99% rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h index 26d270a3d..fa6b2027c 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h @@ -136,4 +136,4 @@ TYPED_TEST( EllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp new file mode 100644 index 000000000..40e2e94b8 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu new file mode 100644 index 000000000..40e2e94b8 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h similarity index 53% rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h index 00184754c..7f5ad546f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_SlicedEllpack_segments.h - description + SparseMatrixTest_SlicedEllpack.h - description ------------------- begin : Dec 9, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. @@ -8,8 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include -#include +#include #include "SparseMatrixTest.hpp" @@ -26,45 +25,38 @@ protected: using SlicedEllpackMatrixType = Matrix; }; -//// -// Row-major format is used for the host system -template< typename Device, typename Index > -using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, true, 32 >; +template< typename Real, typename Device, typename Index > +using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32 >; -//// -// Column-major format is used for GPUs -template< typename Device, typename Index > -using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, false, 32 >; - // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, RowMajorSlicedEllpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, RowMajorSlicedEllpack, TNL::Devices::Host, long > + SlicedEllpackType< int, TNL::Devices::Host, short >, + SlicedEllpackType< long, TNL::Devices::Host, short >, + SlicedEllpackType< float, TNL::Devices::Host, short >, + SlicedEllpackType< double, TNL::Devices::Host, short >, + SlicedEllpackType< int, TNL::Devices::Host, int >, + SlicedEllpackType< long, TNL::Devices::Host, int >, + SlicedEllpackType< float, TNL::Devices::Host, int >, + SlicedEllpackType< double, TNL::Devices::Host, int >, + SlicedEllpackType< int, TNL::Devices::Host, long >, + SlicedEllpackType< long, TNL::Devices::Host, long >, + SlicedEllpackType< float, TNL::Devices::Host, long >, + SlicedEllpackType< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, ColumnMajorSlicedEllpack, TNL::Devices::Cuda, long > + ,SlicedEllpackType< int, TNL::Devices::Cuda, short >, + SlicedEllpackType< long, TNL::Devices::Cuda, short >, + SlicedEllpackType< float, TNL::Devices::Cuda, short >, + SlicedEllpackType< double, TNL::Devices::Cuda, short >, + SlicedEllpackType< int, TNL::Devices::Cuda, int >, + SlicedEllpackType< long, TNL::Devices::Cuda, int >, + SlicedEllpackType< float, TNL::Devices::Cuda, int >, + SlicedEllpackType< double, TNL::Devices::Cuda, int >, + SlicedEllpackType< int, TNL::Devices::Cuda, long >, + SlicedEllpackType< long, TNL::Devices::Cuda, long >, + SlicedEllpackType< float, TNL::Devices::Cuda, long >, + SlicedEllpackType< double, TNL::Devices::Cuda, long > #endif >; @@ -149,4 +141,4 @@ TYPED_TEST( SlicedEllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp deleted file mode 100644 index a88301100..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "SparseMatrixTest_SlicedEllpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu deleted file mode 100644 index a88301100..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu +++ /dev/null @@ -1 +0,0 @@ -#include "SparseMatrixTest_SlicedEllpack_segments.h" -- GitLab From b4cadcda32bc05fd62f5e5119abdda95e1305f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 2 Jan 2020 17:17:49 +0100 Subject: [PATCH 050/179] Deleting deprecated methods in SparseMatrix and SparseMatrixView. --- src/TNL/Matrices/Matrix.h | 18 +- src/TNL/Matrices/MatrixView.h | 11 - src/TNL/Matrices/SparseMatrix.h | 77 +------ src/TNL/Matrices/SparseMatrix.hpp | 211 ------------------ src/TNL/Matrices/SparseMatrixView.h | 59 +---- src/TNL/Matrices/SparseMatrixView.hpp | 175 --------------- src/UnitTests/Matrices/SparseMatrixTest.hpp | 72 ------ .../Matrices/SparseMatrixTest_CSR_segments.h | 7 - .../SparseMatrixTest_Ellpack_segments.h | 7 - .../SparseMatrixTest_SlicedEllpack_segments.h | 7 - 10 files changed, 14 insertions(+), 630 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 66a686046..30031da42 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -56,6 +56,7 @@ public: virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0; + [[deprecated]] virtual IndexType getRowLength( const IndexType row ) const = 0; // TODO: implementation is not parallel @@ -79,12 +80,6 @@ public: __cuda_callable__ IndexType getColumns() const; - /**** - * TODO: The fast variants of the following methods cannot be virtual. - * If they were, they could not be used in the CUDA kernels. If CUDA allows it - * in the future and it does not slow down, declare them as virtual here. - */ - virtual bool setElement( const IndexType row, const IndexType column, const RealType& value ) = 0; @@ -94,17 +89,6 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ) = 0; - virtual bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements ) = 0; - - virtual bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ) = 0; - virtual Real getElement( const IndexType row, const IndexType column ) const = 0; diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 18a9fb488..5a3cde478 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -90,17 +90,6 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ) = 0; - virtual bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements ) = 0; - - virtual bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ) = 0; - virtual Real getElement( const IndexType row, const IndexType column ) const = 0; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 8169f89f2..a64c80934 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -91,18 +91,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getRowLength( const IndexType row ) const; - - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; - - IndexType getNonZeroRowLength( const IndexType row ) const; - - __cuda_callable__ - IndexType getNonZeroRowLengthFast( const IndexType row ) const; - - template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > - void setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ); + [[deprecated]] + virtual IndexType getRowLength( const IndexType row ) const {}; + + template< typename Real_, + typename Device_, + typename Index_, + typename MatrixType_, + template< typename, typename, typename > class Segments_, + typename RealAllocator_, + typename IndexAllocator_ > + void setLike( const SparseMatrix< Real_, Device_, Index_, MatrixType_, Segments_, RealAllocator_, IndexAllocator_ >& matrix ); IndexType getNumberOfNonzeroMatrixElements() const; @@ -114,68 +113,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ RowView getRow( const IndexType& rowIdx ); - [[deprecated("")]] __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); - bool setElement( const IndexType row, const IndexType column, const RealType& value ); - [[deprecated("")]] __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - [[deprecated("")]] bool addElement( const IndexType row, const IndexType column, const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - - [[deprecated("")]] __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - - [[deprecated("")]] - bool setRow( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - - - [[deprecated("")]] __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - - [[deprecated("")]] - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - - - [[deprecated("")]] __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + const RealType& thisElementMultiplicator ); RealType getElement( const IndexType row, const IndexType column ) const; - [[deprecated("")]] __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; - template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, @@ -255,8 +204,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > IndexAllocator indexAllocator; RealAllocator realAllocator; - - }; } // namespace Conatiners diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 691157a9c..e43a4fbed 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -195,64 +195,6 @@ getCompressedRowLengths( Vector& rowLengths ) const this->allRowsReduction( fetch, reduce, keep, 0 ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -Index -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getRowLength( const IndexType row ) const -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -Index -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getRowLengthFast( const IndexType row ) const -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -Index -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getNonZeroRowLength( const IndexType row ) const -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -Index -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getNonZeroRowLengthFast( const IndexType row ) const -{ - -} - template< typename Real, typename Device, typename Index, @@ -331,23 +273,6 @@ getRow( const IndexType& rowIdx ) -> RowView return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) -{ - return this->addElementFast( row, column, value, 0.0 ); -} - template< typename Real, typename Device, typename Index, @@ -364,24 +289,6 @@ setElement( const IndexType row, return this->addElement( row, column, value, 0.0 ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - -} - template< typename Real, typename Device, typename Index, @@ -448,107 +355,6 @@ addElement( const IndexType row, } } - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setRowFast( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ) -{ -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setRow( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ) -{ - const IndexType rowLength = this->segments.getSegmentSize( row ); - if( elements > rowLength ) - return false; - - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); - this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] ); - this->values.setElement( globalIdx, values[ i ] ); - } - for( IndexType i = elements; i < rowLength; i++ ) - this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() ); - return true; -} - - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator ) -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -bool -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator ) -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -Real -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getElementFast( const IndexType row, - const IndexType column ) const -{ - -} - template< typename Real, typename Device, typename Index, @@ -573,23 +379,6 @@ getElement( const IndexType row, return 0.0; } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename, typename > class Segments, - typename RealAllocator, - typename IndexAllocator > -__cuda_callable__ -void -SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const -{ - -} - template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 714692df8..29ea99f75 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -79,16 +79,9 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; + [[deprecated]] IndexType getRowLength( const IndexType row ) const; - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; - - IndexType getNonZeroRowLength( const IndexType row ) const; - - __cuda_callable__ - IndexType getNonZeroRowLengthFast( const IndexType row ) const; - IndexType getNumberOfNonzeroMatrixElements() const; void reset(); @@ -99,68 +92,18 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ RowView getRow( const IndexType& rowIdx ); - [[deprecated("")]] __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); - bool setElement( const IndexType row, const IndexType column, const RealType& value ); - [[deprecated("")]] __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - - [[deprecated("")]] bool addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); - - [[deprecated("")]] __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - - [[deprecated("")]] - bool setRow( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ); - - - [[deprecated("")]] __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - - [[deprecated("")]] - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); - - - [[deprecated("")]] __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; - RealType getElement( const IndexType row, const IndexType column ) const; - [[deprecated("")]] __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; - template< typename Vector > __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index ce0e7aa18..408222373 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -142,44 +142,6 @@ getRowLength( const IndexType row ) const } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -Index -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getRowLengthFast( const IndexType row ) const -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -Index -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getNonZeroRowLength( const IndexType row ) const -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -Index -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getNonZeroRowLengthFast( const IndexType row ) const -{ - -} - template< typename Real, typename Device, typename Index, @@ -237,21 +199,6 @@ getRow( const IndexType& rowIdx ) -> RowView return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) -{ - return this->addElementFast( row, column, value, 0.0 ); -} - template< typename Real, typename Device, typename Index, @@ -266,22 +213,6 @@ setElement( const IndexType row, return this->addElement( row, column, value, 0.0 ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - -} - template< typename Real, typename Device, typename Index, @@ -346,97 +277,6 @@ addElement( const IndexType row, } } - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -setRowFast( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ) -{ -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -setRow( const IndexType row, - const IndexType* columnIndexes, - const RealType* values, - const IndexType elements ) -{ - const IndexType rowLength = this->segments.getSegmentSize( row ); - if( elements > rowLength ) - return false; - - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); - this->columnIndexes.setElement( globalIdx, columnIndexes[ i ] ); - this->values.setElement( globalIdx, values[ i ] ); - } - for( IndexType i = elements; i < rowLength; i++ ) - this->columnIndexes.setElement( this->segments.getGlobalIndex( row, i ), this->getPaddingIndex() ); - return true; -} - - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator ) -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -bool -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator ) -{ - -} - -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -Real -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getElementFast( const IndexType row, - const IndexType column ) const -{ - -} - template< typename Real, typename Device, typename Index, @@ -459,21 +299,6 @@ getElement( const IndexType row, return 0.0; } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -__cuda_callable__ -void -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const -{ - -} - template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index c6ff5cbd7..df06d28fc 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -817,78 +817,6 @@ void test_AddElement() EXPECT_EQ( m.getElement( 5, 4 ), 18 ); } -template< typename Matrix > -void test_SetRow() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 3x7 sparse matrix: - * - * / 0 0 0 1 1 1 0 \ - * | 2 2 2 0 0 0 0 | - * \ 3 3 3 0 0 0 0 / - */ - - const IndexType rows = 3; - const IndexType cols = 7; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setValue( 6 ); - rowLengths.setElement( 1, 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) - { - m.setElement( 0, i + 3, value ); - m.setElement( 1, i, value + 1 ); - m.setElement( 2, i, value + 2 ); - } - - RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 }; - RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 }; - RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 }; - - RealType row = 0; - IndexType elements = 3; - - m.setRow( row++, colIndexes1, row1, elements ); - m.setRow( row++, colIndexes2, row2, elements ); - m.setRow( row++, colIndexes3, row3, elements ); - - - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 22 ); - EXPECT_EQ( m.getElement( 1, 1 ), 22 ); - EXPECT_EQ( m.getElement( 1, 2 ), 22 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 0 ); - EXPECT_EQ( m.getElement( 2, 3 ), 33 ); - EXPECT_EQ( m.getElement( 2, 4 ), 33 ); - EXPECT_EQ( m.getElement( 2, 5 ), 33 ); - EXPECT_EQ( m.getElement( 2, 6 ), 0 ); -} - template< typename Matrix > void test_VectorProduct() { diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index e86e34f0a..34f7b4f70 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -116,13 +116,6 @@ TYPED_TEST( CSRMatrixTest, addElementTest ) test_AddElement< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setRowTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SetRow< CSRMatrixType >(); -} - TYPED_TEST( CSRMatrixTest, vectorProductTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index f597e3199..48cf9afbf 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -126,13 +126,6 @@ TYPED_TEST( EllpackMatrixTest, addElementTest ) test_AddElement< EllpackMatrixType >(); } -TYPED_TEST( EllpackMatrixTest, setRowTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_SetRow< EllpackMatrixType >(); -} - TYPED_TEST( EllpackMatrixTest, vectorProductTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 172ed722a..8eba34a2b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -127,13 +127,6 @@ TYPED_TEST( SlicedEllpackMatrixTest, addElementTest ) test_AddElement< SlicedEllpackMatrixType >(); } -TYPED_TEST( SlicedEllpackMatrixTest, setRowTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SetRow< SlicedEllpackMatrixType >(); -} - TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -- GitLab From 1276b9e51707185b7f523c02797e5228afa287d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 2 Jan 2020 17:19:16 +0100 Subject: [PATCH 051/179] Deleted getView in Matrix and MatrixView. --- src/TNL/Matrices/Matrix.h | 4 ---- src/TNL/Matrices/Matrix.hpp | 22 ---------------------- src/TNL/Matrices/MatrixView.h | 6 ------ src/TNL/Matrices/MatrixView.hpp | 22 ---------------------- 4 files changed, 54 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 30031da42..8fc8cb5f2 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -47,10 +47,6 @@ public: const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); - /*ViewType getView(); - - ConstViewType getConstView() const;*/ - virtual void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 3a09d0088..9fc5ea620 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -43,28 +43,6 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType { } -/*template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -auto -Matrix< Real, Device, Index, RealAllocator >:: -getView() -> ViewType -{ - return ViewType( rows, columns, values.getView() ); -} - -template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -auto -Matrix< Real, Device, Index, RealAllocator >:: -getConstView() const -> ConstViewType -{ - return ConstViewType( rows, columns, values.getConstView() ); -}*/ - template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 5a3cde478..76965e511 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -49,12 +49,6 @@ public: __cuda_callable__ MatrixView( const MatrixView& view ) = default; - //__cuda_callable__ - //ViewType getView(); - - //__cuda_callable__ - //ConstViewType getConstView() const; - virtual IndexType getRowLength( const IndexType row ) const = 0; // TODO: implementation is not parallel diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 0473f52b8..bd3d9beae 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -42,28 +42,6 @@ MatrixView( const IndexType rows_, { } -/*template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -auto -MatrixView< Real, Device, Index >:: -getView() ->ViewType -{ - return ViewType( rows, columns, values.getView() ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -auto -MatrixView< Real, Device, Index >:: -getConstView() const -> ConstViewType -{ - return ConstViewType( rows, columns, values.getConstView() ); -}*/ - template< typename Real, typename Device, typename Index > -- GitLab From 1a519384e45caab6676075dabd57d8cb4233f080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 2 Jan 2020 21:40:53 +0100 Subject: [PATCH 052/179] Rewritting assignement operator of sparse matrix to work with any matrix view. --- src/TNL/Containers/Segments/CSRView.h | 9 +- src/TNL/Containers/Segments/CSRView.hpp | 9 -- src/TNL/Matrices/SparseMatrix.h | 28 ++++-- src/TNL/Matrices/SparseMatrix.hpp | 116 +++++++++++++++--------- src/TNL/Matrices/SparseMatrixView.h | 6 ++ src/TNL/Matrices/SparseMatrixView.hpp | 32 +++++++ 6 files changed, 133 insertions(+), 67 deletions(-) diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 860a35a0a..f8bcacd0f 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -27,8 +27,8 @@ class CSRView using DeviceType = Device; using IndexType = Index; - using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, IndexType >; - using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, IndexType >::ConstViewType; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >; + using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType; using ViewType = CSRView; template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; @@ -39,10 +39,9 @@ class CSRView CSRView(); __cuda_callable__ - CSRView( const OffsetsView&& offsets ); + CSRView( const OffsetsView& offsets ); - __cuda_callable__ - CSRView( const ConstOffsetsView&& offsets ); + CSRView( const OffsetsView&& offsets ); __cuda_callable__ CSRView( const CSRView& csr_view ); diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index b4304ee32..b0bb35313 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -37,15 +37,6 @@ CSRView( const OffsetsView&& offsets_view ) { } -template< typename Device, - typename Index > -__cuda_callable__ -CSRView< Device, Index >:: -CSRView( const ConstOffsetsView&& offsets_view ) - : offsets( offsets_view ) -{ -} - template< typename Device, typename Index > __cuda_callable__ diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index a64c80934..0e3484c10 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -159,27 +159,35 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + template< typename Function > void forAllRows( Function& function ) const; + template< typename Function > + void forAllRows( Function& function ); + template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega = 1.0 ) const; - // copy assignment + /** + * \brief Assignment of exactly the same matrix type. + * @param matrix + * @return + */ SparseMatrix& operator=( const SparseMatrix& matrix ); - // cross-device copy assignment - template< typename Real2, - typename Device2, - typename Index2, - typename MatrixType2, - template< typename, typename, typename > class Segments2, - typename RealAllocator2, - typename IndexAllocator2 > - SparseMatrix& operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ); + /** + * \brief Assignment of any other matrix type. + * @param matrix + * @return + */ + template< typename RHSMatrix > + SparseMatrix& operator=( const RHSMatrix& matrix ); void save( File& file ) const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index e43a4fbed..72184738b 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -11,8 +11,8 @@ #pragma once #include -#include #include +#include namespace TNL { namespace Matrices { @@ -488,7 +488,30 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, localIdx, globalIdx ); + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); @@ -510,6 +533,21 @@ forAllRows( Function& function ) const this->forRows( 0, this->getRows(), function ); } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + /*template< typename Real, template< typename, typename, typename > class Segments, typename Device, @@ -585,56 +623,52 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > - template< typename Real2, - typename Device2, - typename Index2, - typename MatrixType2, - template< typename, typename, typename > class Segments2, - typename RealAllocator2, - typename IndexAllocator2 > + template< typename RHSMatrix > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ) +operator=( const RHSMatrix& matrix ) { - using RHSMatrixType = SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >; - typename RHSMatrixType::RowsCapacitiesType rowLengths; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; + + typename RHSMatrix::RowsCapacitiesType rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); - // TODO: Replace this with SparseMatrixView - const auto matrix_columns_view = matrix.columnIndexes.getConstView(); - const auto matrix_values_view = matrix.values.getConstView(); + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); - auto this_columns_view = this->columnIndexes.getView(); - auto this_values_view = this->values.getView(); - this_columns_view = paddingIndex; + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + columns_view = paddingIndex; - if( std::is_same< Device, Device2 >::value ) + if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto this_segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { - const IndexType column = matrix_columns_view[ globalIdx ]; - if( column != paddingIndex ) + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) { - const RealType value = matrix_values_view[ globalIdx ]; - IndexType thisGlobalIdx = this_segments_view.getGlobalIndex( rowIdx, localIdx ); - this_columns_view[ thisGlobalIdx ] = column; - this_values_view[ thisGlobalIdx ] = value; + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); + columns_view[ thisGlobalIdx ] = columnIndex; + values_view[ thisGlobalIdx ] = value; } }; matrix.forAllRows( f ); } else { - //std::cerr << "Matrix = " << std::endl << matrix << std::endl; const IndexType maxRowLength = max( rowLengths ); - const IndexType bufferRowsCount( 8 ); + const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; - Containers::Vector< Real2, Device2, Index2, RealAllocator2 > matrixValuesBuffer( bufferSize ); - Containers::Vector< Index2, Device2, Index2, IndexAllocator2 > matrixColumnsBuffer( bufferSize ); - Containers::Vector< RealType, DeviceType, IndexType, RealAllocator > thisValuesBuffer( bufferSize ); - Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocator > thisColumnsBuffer( bufferSize ); + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); @@ -650,20 +684,16 @@ operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, R //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { - const IndexType column = matrix_columns_view[ globalIdx ]; - if( column != paddingIndex ) + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - //printf( ">>>RowIdx = %d GlobalIdx = %d column = %d bufferIdx = %d \n", rowIdx, globalIdx, column, bufferIdx ); - matrixValuesBuffer_view[ bufferIdx ] = matrix_values_view[ globalIdx ]; - matrixColumnsBuffer_view[ bufferIdx ] = column; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; } }; matrix.forRows( baseRow, lastRow, f1 ); - //std::cerr << "Values = " << matrixValuesBuffer_view << std::endl; - //std::cerr << "Columns = " << matrixColumnsBuffer_view << std::endl; //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; @@ -671,13 +701,13 @@ operator=( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, R //// // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable { + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; if( column != paddingIndex ) { - this_columns_view[ globalIdx ] = column; - this_values_view[ globalIdx ] = thisValuesBuffer_view[ bufferIdx ]; + columnIndex = column; + value = thisValuesBuffer_view[ bufferIdx ]; } }; this->forRows( baseRow, lastRow, f2 ); diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 29ea99f75..1f587acf3 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -128,9 +128,15 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > template< typename Function > void forRows( IndexType first, IndexType last, Function& function ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + template< typename Function > void forAllRows( Function& function ) const; + template< typename Function > + void forAllRows( Function& function ); + template< typename Vector1, typename Vector2 > bool performSORIteration( const Vector1& b, const IndexType row, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 408222373..df136388e 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -402,7 +402,26 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); +} +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, globalIdx ); + return true; + }; + this->segments.forSegments( first, last, f ); } template< typename Real, @@ -418,6 +437,19 @@ forAllRows( Function& function ) const this->forRows( 0, this->getRows(), function ); } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + /*template< typename Real, template< typename, typename > class SegmentsView, typename Device, -- GitLab From 9895f08102c06c1ca46be39240a55258742349b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:03:10 +0100 Subject: [PATCH 053/179] Fixed legacy sparse matrix formats unit tests. --- src/TNL/Matrices/{Dense_impl.h => Dense.hpp} | 0 src/UnitTests/Matrices/Legacy/CMakeLists.txt | 14 +++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) rename src/TNL/Matrices/{Dense_impl.h => Dense.hpp} (100%) diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense.hpp similarity index 100% rename from src/TNL/Matrices/Dense_impl.h rename to src/TNL/Matrices/Dense.hpp diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt index 9cdfe2784..4320b6c1f 100644 --- a/src/UnitTests/Matrices/Legacy/CMakeLists.txt +++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt @@ -59,14 +59,14 @@ ELSE( BUILD_CUDA ) ENDIF( BUILD_CUDA ) -ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: DenseMatrixTest is not finished #ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -- GitLab From bc185a64f9cdfd383156a7ed06ab19789c1dfab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:03:54 +0100 Subject: [PATCH 054/179] Added template parameters to dense matrix: RowMajorOrder and RealAllocator. --- src/TNL/Matrices/Dense.h | 37 ++-- src/TNL/Matrices/Dense.hpp | 284 +++++++++++++++++++---------- src/TNL/Matrices/DistributedSpMV.h | 3 +- src/TNL/Matrices/Matrix.h | 4 +- src/TNL/Matrices/Matrix.hpp | 7 +- src/TNL/Matrices/SparseMatrix.h | 10 +- src/TNL/Matrices/SparseMatrix.hpp | 4 +- 7 files changed, 215 insertions(+), 134 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index c46992723..3fc6d8908 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include @@ -23,7 +24,9 @@ class DenseDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, - typename Index = int > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { private: @@ -32,17 +35,17 @@ private: using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Dense; + //template< typename Real2, typename Device2, typename Index2 > + //friend class Dense; public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef DenseRow< Real, Index > MatrixRow; + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + using BaseType = Matrix< Real, Device, Index >; + using MatrixRow = DenseRow< Real, Index >; template< typename _Real = Real, typename _Device = Device, @@ -58,23 +61,17 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename Matrix > + void setLike( const Matrix& matrix ); /**** * This method is only for the compatibility with the sparse matrices. */ void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - /**** - * Returns maximal number of the nonzero matrix elements that can be stored - * in a given row. - */ + [[deprecated]] IndexType getRowLength( const IndexType row ) const; - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; - IndexType getMaxRowLength() const; IndexType getNumberOfMatrixElements() const; @@ -220,4 +217,4 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 246bd09ed..70e5018dd 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -19,15 +19,19 @@ namespace Matrices { template< typename Real, typename Device, - typename Index > -Dense< Real, Device, Index >::Dense() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense() { } template< typename Real, typename Device, - typename Index > -String Dense< Real, Device, Index >::getSerializationType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationType() { return String( "Matrices::Dense< " ) + getType< RealType >() + ", " + @@ -37,16 +41,20 @@ String Dense< Real, Device, Index >::getSerializationType() template< typename Real, typename Device, - typename Index > -String Dense< Real, Device, Index >::getSerializationTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationTypeVirtual() const { return this->getSerializationType(); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setDimensions( const IndexType rows, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); @@ -56,59 +64,71 @@ void Dense< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, - typename Index > - template< typename Real2, - typename Device2, - typename Index2 > -void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Matrix_ > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setLike( const Matrix_& matrix ) { - this->setDimensions( matrix.getRows(), matrix.getColumns() ); + Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( const IndexType row ) const { return this->getColumns(); } -template< typename Real, +/*template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLengthFast( const IndexType row ) const { return this->getColumns(); -} +}*/ template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getMaxRowLength() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMaxRowLength() const { return this->getColumns(); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getNumberOfMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfMatrixElements() const { return this->getRows() * this->getColumns(); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const { IndexType nonzeroElements( 0 ); for( IndexType row = 0; row < this->getRows(); row++ ) @@ -120,8 +140,10 @@ Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::reset() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset() { Matrix< Real, Device, Index >::reset(); this->values.reset(); @@ -129,8 +151,10 @@ void Dense< Real, Device, Index >::reset() template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setValue( const Real& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value ) { this->values.setValue( value ); } @@ -138,9 +162,11 @@ void Dense< Real, Device, Index >::setValue( const Real& value ) template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -Real& Dense< Real, Device, Index >::operator()( const IndexType row, +Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, const IndexType column ) { TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); @@ -153,9 +179,11 @@ Real& Dense< Real, Device, Index >::operator()( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -const Real& Dense< Real, Device, Index >::operator()( const IndexType row, +const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, const IndexType column ) const { TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); @@ -169,9 +197,11 @@ const Real& Dense< Real, Device, Index >::operator()( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -bool Dense< Real, Device, Index >::setElementFast( const IndexType row, +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElementFast( const IndexType row, const IndexType column, const RealType& value ) { @@ -186,8 +216,10 @@ bool Dense< Real, Device, Index >::setElementFast( const IndexType row, template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::setElement( const IndexType row, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( const IndexType row, const IndexType column, const RealType& value ) { @@ -198,9 +230,11 @@ bool Dense< Real, Device, Index >::setElement( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -bool Dense< Real, Device, Index >::addElementFast( const IndexType row, +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElementFast( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) @@ -221,8 +255,10 @@ bool Dense< Real, Device, Index >::addElementFast( const IndexType row, template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::addElement( const IndexType row, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) @@ -240,9 +276,11 @@ bool Dense< Real, Device, Index >::addElement( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -bool Dense< Real, Device, Index >::setRowFast( const IndexType row, +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRowFast( const IndexType row, const IndexType* columns, const RealType* values, const IndexType elements ) @@ -257,8 +295,10 @@ bool Dense< Real, Device, Index >::setRowFast( const IndexType row, template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::setRow( const IndexType row, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRow( const IndexType row, const IndexType* columns, const RealType* values, const IndexType elements ) @@ -273,9 +313,11 @@ bool Dense< Real, Device, Index >::setRow( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -bool Dense< Real, Device, Index >::addRowFast( const IndexType row, +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRowFast( const IndexType row, const IndexType* columns, const RealType* values, const IndexType elements, @@ -292,8 +334,10 @@ bool Dense< Real, Device, Index >::addRowFast( const IndexType row, template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::addRow( const IndexType row, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRow( const IndexType row, const IndexType* columns, const RealType* values, const IndexType elements, @@ -311,9 +355,11 @@ bool Dense< Real, Device, Index >::addRow( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row, +const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementFast( const IndexType row, const IndexType column ) const { TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); @@ -326,8 +372,10 @@ const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row, template< typename Real, typename Device, - typename Index > -Real Dense< Real, Device, Index >::getElement( const IndexType row, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row, const IndexType column ) const { return this->values.getElement( this->getElementIndex( row, column ) ); @@ -335,9 +383,11 @@ Real Dense< Real, Device, Index >::getElement( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -void Dense< Real, Device, Index >::getRowFast( const IndexType row, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowFast( const IndexType row, IndexType* columns, RealType* values ) const { @@ -350,10 +400,12 @@ void Dense< Real, Device, Index >::getRowFast( const IndexType row, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -typename Dense< Real, Device, Index >::MatrixRow -Dense< Real, Device, Index >:: +typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) { if( std::is_same< Device, Devices::Host >::value ) @@ -368,10 +420,12 @@ getRow( const IndexType rowIndex ) template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -const typename Dense< Real, Device, Index >::MatrixRow -Dense< Real, Device, Index >:: +const typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) const { if( std::is_same< Device, Devices::Host >::value ) @@ -386,10 +440,12 @@ getRow( const IndexType rowIndex ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const IndexType row, +typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, const Vector& vector ) const { RealType sum( 0.0 ); @@ -400,10 +456,12 @@ typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename InVector, typename OutVector > -void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), @@ -418,9 +476,11 @@ void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix > -void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Matrix& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -440,6 +500,8 @@ void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix, #ifdef HAVE_CUDA template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename Matrix1, typename Matrix2, int tileDim, @@ -538,9 +600,11 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix1, typename Matrix2, int tileDim > -void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1, const Matrix2& matrix2, const RealType& matrix1Multiplicator, const RealType& matrix2Multiplicator ) @@ -628,6 +692,8 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, template< typename Real, typename Index, typename Matrix, + bool RowMajorOrder, + typename RealAllocator, int tileDim, int tileRowBlockSize > __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, @@ -696,6 +762,8 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename Matrix, int tileDim, int tileRowBlockSize > @@ -776,9 +844,11 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix, int tileDim > -void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getColumns() == matrix.getRows() && @@ -867,9 +937,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector1, typename Vector2 > -void Dense< Real, Device, Index >::performSORIteration( const Vector1& b, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -889,9 +961,11 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector1& b, // copy assignment template< typename Real, typename Device, - typename Index > -Dense< Real, Device, Index >& -Dense< Real, Device, Index >::operator=( const Dense& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -901,10 +975,12 @@ Dense< Real, Device, Index >::operator=( const Dense& matrix ) // cross-device copy assignment template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Device2, typename Index2, typename > -Dense< Real, Device, Index >& -Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -919,40 +995,50 @@ Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::save( const String& fileName ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const { Object::save( fileName ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::load( const String& fileName ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::save( File& file ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::load( File& file ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -965,9 +1051,11 @@ void Dense< Real, Device, Index >::print( std::ostream& str ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -Index Dense< Real, Device, Index >::getElementIndex( const IndexType row, +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, const IndexType column ) const { TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || @@ -988,9 +1076,11 @@ class DenseDeviceDependentCode< Devices::Host > template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename InVector, typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index >& matrix, + static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { @@ -1011,9 +1101,11 @@ class DenseDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename InVector, typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index >& matrix, + static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index b2abd13c5..e5b2e9008 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -19,6 +19,7 @@ #include #include // std::pair #include // std::numeric_limits +#include #include #include #include @@ -235,7 +236,7 @@ public: protected: // communication pattern - Matrices::Dense< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds; + Matrices::Dense< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds; // span of rows with only block-diagonal entries std::pair< IndexType, IndexType > localOnlySpan; diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 8fc8cb5f2..a9b458d7b 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -61,8 +61,8 @@ public: virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; - template< typename Real2, typename Device2, typename Index2, typename RealAllocator2 > - void setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix ); + template< typename Matrix_ > + void setLike( const Matrix_& matrix ); IndexType getNumberOfMatrixElements() const; diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 9fc5ea620..29226cb00 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -81,11 +81,8 @@ template< typename Real, typename Device, typename Index, typename RealAllocator > - template< typename Real2, - typename Device2, - typename Index2, - typename RealAllocator2 > -void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix< Real2, Device2, Index2, RealAllocator2 >& matrix ) + template< typename Matrix_ > +void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix ) { setDimensions( matrix.getRows(), matrix.getColumns() ); } diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 0e3484c10..8f96af169 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -94,14 +94,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > [[deprecated]] virtual IndexType getRowLength( const IndexType row ) const {}; - template< typename Real_, - typename Device_, - typename Index_, - typename MatrixType_, - template< typename, typename, typename > class Segments_, - typename RealAllocator_, - typename IndexAllocator_ > - void setLike( const SparseMatrix< Real_, Device_, Index_, MatrixType_, Segments_, RealAllocator_, IndexAllocator_ >& matrix ); + template< typename Matrix > + void setLike( const Matrix& matrix ); IndexType getNumberOfNonzeroMatrixElements() const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 72184738b..6c0655ce0 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -202,10 +202,10 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > - template< typename Real2, typename Device2, typename Index2, typename MatrixType2, template< typename, typename, typename > class Segments2, typename RealAllocator2, typename IndexAllocator2 > + template< typename Matrix_ > void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -setLike( const SparseMatrix< Real2, Device2, Index2, MatrixType2, Segments2, RealAllocator2, IndexAllocator2 >& matrix ) +setLike( const Matrix_& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } -- GitLab From 25d43763f10fe3f6f19851daa152f035e2a08439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:07:16 +0100 Subject: [PATCH 055/179] Reimplementation of Matrices::Dense::getNumberOfNonzeroMatrixElements. --- src/TNL/Matrices/Dense.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 70e5018dd..7e6dec9ce 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -130,12 +130,11 @@ template< typename Real, typename RealAllocator > Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const { - IndexType nonzeroElements( 0 ); - for( IndexType row = 0; row < this->getRows(); row++ ) - for( IndexType column = 0; column < this->getColumns(); column++ ) - if( this->getElement( row, column ) != 0 ) - nonzeroElements++; - return nonzeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, -- GitLab From 8758a8e6a550ee9c24dcb2d8258aac6d0adcea5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:19:12 +0100 Subject: [PATCH 056/179] Deleting of deprecated methods in dense matrix. --- src/TNL/Matrices/Dense.h | 48 ------- src/TNL/Matrices/Dense.hpp | 173 +---------------------- src/TNL/Matrices/DistributedSpMV.h | 4 +- src/UnitTests/Matrices/DenseMatrixTest.h | 10 +- 4 files changed, 14 insertions(+), 221 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 3fc6d8908..a2c6a7eda 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -90,66 +90,18 @@ public: const Real& operator()( const IndexType row, const IndexType column ) const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); - bool setElement( const IndexType row, const IndexType column, const RealType& value ); - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); - bool addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); - - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); - - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); - - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); - - __cuda_callable__ - const Real& getElementFast( const IndexType row, - const IndexType column ) const; - Real getElement( const IndexType row, const IndexType column ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; - - /*void getRow( const IndexType row, - IndexType* columns, - RealType* values ) const;*/ - __cuda_callable__ MatrixRow getRow( const IndexType rowIndex ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 7e6dec9ce..190052390 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -145,7 +145,6 @@ template< typename Real, void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); } template< typename Real, @@ -155,10 +154,9 @@ template< typename Real, typename RealAllocator > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value ) { - this->values.setValue( value ); + this->values = value; } - template< typename Real, typename Device, typename Index, @@ -193,26 +191,6 @@ const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator return this->values.operator[]( this->getElementIndex( row, column ) ); } - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) -{ - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - this->values.operator[]( this->getElementIndex( row, column ) ) = value; - return true; -} - template< typename Real, typename Device, typename Index, @@ -226,32 +204,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( con return true; } - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - const IndexType elementIndex = this->getElementIndex( row, column ); - if( thisElementMultiplicator == 1.0 ) - this->values.operator[]( elementIndex ) += value; - else - this->values.operator[]( elementIndex ) = - thisElementMultiplicator * this->values.operator[]( elementIndex ) + value; - return true; -} - template< typename Real, typename Device, typename Index, @@ -272,103 +224,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( con return true; } - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) -{ - TNL_ASSERT( elements <= this->getColumns(), - std::cerr << " elements = " << elements - << " this->columns = " << this->getColumns() ); - for( IndexType i = 0; i < elements; i++ ) - this->setElementFast( row, columns[ i ], values[ i ] ); - return true; -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) -{ - TNL_ASSERT( elements <= this->getColumns(), - std::cerr << " elements = " << elements - << " this->columns = " << this->getColumns() ); - for( IndexType i = 0; i < elements; i++ ) - this->setElement( row, columns[ i ], values[ i ] ); - return true; -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - for( IndexType i = 0; i < elements; i++ ) - this->setElementFast( row, columns[ i ], - thisRowMultiplicator * this->getElementFast( row, columns[ i ] ) + values[ i ] ); - return true; -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - for( IndexType i = 0; i < elements; i++ ) - this->setElement( row, columns[ i ], - thisRowMultiplicator * this->getElement( row, columns[ i ] ) + values[ i ] ); - return true; -} - - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementFast( const IndexType row, - const IndexType column ) const -{ - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - return this->values.operator[]( this->getElementIndex( row, column ) ); -} - template< typename Real, typename Device, typename Index, @@ -380,23 +235,6 @@ Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( con return this->values.getElement( this->getElementIndex( row, column ) ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const -{ - for( IndexType i = 0; i < this->getColumns(); i++ ) - { - columns[ i ] = i; - values[ i ] = this->getElementFast( row, i ); - } -} - template< typename Real, typename Device, typename Index, @@ -448,8 +286,9 @@ typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocat const Vector& vector ) const { RealType sum( 0.0 ); - for( IndexType column = 0; column < this->getColumns(); column++ ) - sum += this->getElementFast( row, column ) * vector[ column ]; + // TODO: Fix this + //for( IndexType column = 0; column < this->getColumns(); column++ ) + // sum += this->getElementFast( row, column ) * vector[ column ]; return sum; } @@ -949,9 +788,9 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera for( IndexType i = 0; i < this->getColumns(); i++ ) { if( i == row ) - diagonalValue = this->getElementFast( row, row ); + diagonalValue = this->getElement( row, row ); else - sum += this->getElementFast( row, i ) * x[ i ]; + sum += this->getElement( row, i ) * x[ i ]; } x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index e5b2e9008..8460ded4d 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -125,8 +125,8 @@ public: preCommPatternEnds.setLike( commPatternEnds ); for( int j = 0; j < nproc; j++ ) for( int i = 0; i < nproc; i++ ) { - preCommPatternStarts.setElementFast( j, i, span_starts.getElement( i ) ); - preCommPatternEnds.setElementFast( j, i, span_ends.getElement( i ) ); + preCommPatternStarts.setElement( j, i, span_starts.getElement( i ) ); + preCommPatternEnds.setElement( j, i, span_ends.getElement( i ) ); } // assemble the commPattern* matrices diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 8d9e9c727..fc6ea6bd2 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -537,9 +537,10 @@ void test_SetRow() IndexType row = 0; IndexType elements = 5; - m.setRow( row++, colIndexes1, row1, elements ); + // TODO: Fix this + /*m.setRow( row++, colIndexes1, row1, elements ); m.setRow( row++, colIndexes2, row2, elements ); - m.setRow( row++, colIndexes3, row3, elements ); + m.setRow( row++, colIndexes3, row3, elements );*/ EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); @@ -654,12 +655,13 @@ void test_AddRow() IndexType elements = 5; RealType thisRowMultiplicator = 0; - m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ ); + // TODO: Fix this + /*m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ ); + m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/ EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); -- GitLab From 7218a64dcb5d8a2d6fb616afc2bc66b6ca4bf1a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:47:01 +0100 Subject: [PATCH 057/179] Replacing CSRSegmentView and EllpackSegment view with one general but specialized SegmentView. --- src/TNL/Containers/Segments/CSR.h | 4 +- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/CSRSegmentView.h | 47 ------------------- src/TNL/Containers/Segments/CSRView.h | 4 +- src/TNL/Containers/Segments/CSRView.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.h | 4 +- src/TNL/Containers/Segments/Ellpack.hpp | 4 +- src/TNL/Containers/Segments/EllpackView.h | 4 +- .../{EllpackSegmentView.h => SegmentView.h} | 46 ++++++++++++++++-- src/TNL/Containers/Segments/SlicedEllpack.h | 4 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 4 +- .../Containers/Segments/SlicedEllpackView.h | 4 +- 12 files changed, 59 insertions(+), 70 deletions(-) delete mode 100644 src/TNL/Containers/Segments/CSRSegmentView.h rename src/TNL/Containers/Segments/{EllpackSegmentView.h => SegmentView.h} (51%) diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index df7cb5686..3645e9f6a 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -14,7 +14,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -35,7 +35,7 @@ class CSR using ViewTemplate = CSRView< Device_, Index_ >; using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; - using SegmentViewType = CSRSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType, true >; CSR(); diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 83da548fc..8b8ddfff5 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -176,7 +176,7 @@ auto CSR< Device, Index, IndexAllocator >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - return SegmentView( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); + return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/CSRSegmentView.h b/src/TNL/Containers/Segments/CSRSegmentView.h deleted file mode 100644 index 3ab5ef9d2..000000000 --- a/src/TNL/Containers/Segments/CSRSegmentView.h +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************** - CSRSegmentView.h - description - ------------------- - begin : Dec 28, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { - namespace Containers { - namespace Segments { - -template< typename Index > -class CSRSegmentView -{ - public: - - using IndexType = Index; - - __cuda_callable__ - CSRSegmentView( const IndexType offset, const IndexType size ) - : segmentOffset( offset ), segmentSize( size ){}; - - __cuda_callable__ - IndexType getSize() const - { - return this->segmentSize; - }; - - __cuda_callable__ - IndexType getGlobalIndex( const IndexType localIndex ) const - { - TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); - return segmentOffset + localIndex; - }; - - protected: - - IndexType segmentOffset, segmentSize; -}; - } //namespace Segments - } //namespace Containers -} //namespace TNL \ No newline at end of file diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index f8bcacd0f..a0f5cd200 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -33,7 +33,7 @@ class CSRView template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; - using SegmentViewType = CSRSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType >; __cuda_callable__ CSRView(); diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index b0bb35313..bbed8e3cb 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -156,7 +156,7 @@ auto CSRView< Device, Index >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); + return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index f73155335..429615647 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -37,7 +37,7 @@ class Ellpack using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; - using SegmentViewType = EllpackSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; Ellpack(); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index ebc2b360e..97a256c9e 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -239,9 +239,9 @@ Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( RowMajorOrder ) - return SegmentView( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); else - return SegmentView( segmentIdx, this->segmentSize, this->alignedSize ); + return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 682eeeb4a..737810498 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace TNL { @@ -38,7 +38,7 @@ class EllpackView using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView; //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; - using SegmentViewType = EllpackSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; __cuda_callable__ EllpackView(); diff --git a/src/TNL/Containers/Segments/EllpackSegmentView.h b/src/TNL/Containers/Segments/SegmentView.h similarity index 51% rename from src/TNL/Containers/Segments/EllpackSegmentView.h rename to src/TNL/Containers/Segments/SegmentView.h index 7a1638e3f..29f2e7781 100644 --- a/src/TNL/Containers/Segments/EllpackSegmentView.h +++ b/src/TNL/Containers/Segments/SegmentView.h @@ -1,5 +1,5 @@ /*************************************************************************** - EllpackSegmentView.h - description + SegmentView.h - description ------------------- begin : Dec 28, 2019 copyright : (C) 2019 by Tomas Oberhuber @@ -14,17 +14,21 @@ namespace TNL { namespace Containers { namespace Segments { +template< typename Index, + bool RowMajorOrder = false > +class SegmentView; + template< typename Index > -class EllpackSegmentView +class SegmentView< Index, false > { public: using IndexType = Index; __cuda_callable__ - EllpackSegmentView( const IndexType offset, - const IndexType size, - const IndexType step ) + SegmentView( const IndexType offset, + const IndexType size, + const IndexType step ) : segmentOffset( offset ), segmentSize( size ), step( step ){}; __cuda_callable__ @@ -44,6 +48,38 @@ class EllpackSegmentView IndexType segmentOffset, segmentSize, step; }; + +template< typename Index > +class SegmentView< Index, true > +{ + public: + + using IndexType = Index; + + __cuda_callable__ + SegmentView( const IndexType offset, + const IndexType size, + const IndexType step = 1 ) // For compatibility with previous specialization + : segmentOffset( offset ), segmentSize( size ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex; + }; + + protected: + + IndexType segmentOffset, segmentSize; +}; + } //namespace Segments } //namespace Containers } //namespace TNL diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 76185bcac..5953cde36 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace TNL { namespace Containers { @@ -37,7 +37,7 @@ class SlicedEllpack template< typename Device_, typename Index_ > using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; - using SegmentViewType = EllpackSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; SlicedEllpack(); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index b58b6a954..76790f393 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -269,9 +269,9 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; if( RowMajorOrder ) - return SegmentView( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); + return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else - return SegmentView( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); + return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index e87c75229..86745e7c0 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -13,7 +13,7 @@ #include #include -#include +#include namespace TNL { namespace Containers { @@ -36,7 +36,7 @@ class SlicedEllpackView using ViewTemplate = SlicedEllpackView< Device_, Index_ >; using ViewType = SlicedEllpackView; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; - using SegmentViewType = EllpackSegmentView< IndexType >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; __cuda_callable__ SlicedEllpackView(); -- GitLab From 922a92e213835aaba2581274ecd0f89c3d097312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 11:51:14 +0100 Subject: [PATCH 058/179] Deleted useless file Containers/Segments.h. --- src/TNL/Containers/Segments.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 src/TNL/Containers/Segments.h diff --git a/src/TNL/Containers/Segments.h b/src/TNL/Containers/Segments.h deleted file mode 100644 index 99ea22357..000000000 --- a/src/TNL/Containers/Segments.h +++ /dev/null @@ -1,29 +0,0 @@ -/*************************************************************************** - Segments.h - description - ------------------- - begin : Nov 29, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Containers { - -template< typename Value, - typename Organization > -class Segments -{ - public: - - using ValueType = Value; - using OrganizationType = Organization; - using IndexType = typename Organization::IndexType; - -}; - -} // namespace Conatiners -} // namespace TNL \ No newline at end of file -- GitLab From 0265b5936104b53823def31271ced3facd892116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 12:09:56 +0100 Subject: [PATCH 059/179] Added segments to dense matrix. --- src/TNL/Matrices/Dense.h | 7 +++++-- src/TNL/Matrices/Dense.hpp | 42 ++++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index a2c6a7eda..cff1d57b4 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -14,10 +14,10 @@ #include #include #include -#include +#include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class DenseDeviceDependentCode; @@ -46,6 +46,7 @@ public: using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; using BaseType = Matrix< Real, Device, Index >; using MatrixRow = DenseRow< Real, Index >; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; template< typename _Real = Real, typename _Device = Device, @@ -164,6 +165,8 @@ protected: typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; friend class DenseDeviceDependentCode< DeviceType >; + + SegmentsType segments; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 190052390..bed7a37b7 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -31,7 +31,9 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationType() +String +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationType() { return String( "Matrices::Dense< " ) + getType< RealType >() + ", " + @@ -44,7 +46,9 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getSerializationTypeVirtual() const +String +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationTypeVirtual() const { return this->getSerializationType(); } @@ -54,12 +58,15 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setDimensions( const IndexType rows, - const IndexType columns ) +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); + this->segments.setSegmentsSizes( rows, columns ); this->values.setSize( rows * columns ); - this->values.setValue( 0.0 ); + this->values = 0.0; } template< typename Real, @@ -68,7 +75,9 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator > template< typename Matrix_ > -void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setLike( const Matrix_& matrix ) +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setLike( const Matrix_& matrix ) { Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); } @@ -78,8 +87,11 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { + this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); } template< typename Real, @@ -92,17 +104,6 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( return this->getColumns(); } -/*template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLengthFast( const IndexType row ) const -{ - return this->getColumns(); -}*/ - template< typename Real, typename Device, typename Index, @@ -896,13 +897,14 @@ __cuda_callable__ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, const IndexType column ) const { - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || + return this->segments.getGlobalIndex( row, column ); + /*TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value ), ) if( std::is_same< Device, Devices::Host >::value ) return row * this->columns + column; if( std::is_same< Device, Devices::Cuda >::value ) return column * this->rows + row; - return -1; + return -1;*/ } template<> -- GitLab From 616b29d73c4129a215fbb316a4fe2f3f193ddf98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 13:47:47 +0100 Subject: [PATCH 060/179] Added DenseMatrixRowView. --- src/TNL/Containers/Segments/Ellpack.h | 2 +- src/TNL/Matrices/Dense.h | 24 ++++++-- src/TNL/Matrices/Dense.hpp | 73 +++++++++---------------- src/TNL/Matrices/DenseMatrixRowView.h | 52 ++++++++++++++++++ src/TNL/Matrices/DenseMatrixRowView.hpp | 71 ++++++++++++++++++++++++ src/TNL/Matrices/SparseMatrix.h | 2 +- 6 files changed, 169 insertions(+), 55 deletions(-) create mode 100644 src/TNL/Matrices/DenseMatrixRowView.h create mode 100644 src/TNL/Matrices/DenseMatrixRowView.hpp diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 429615647..c197c7010 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -124,7 +124,7 @@ class Ellpack }; } // namespace Segements - } // namespace Conatiners + } // namespace Containers } // namespace TNL #include diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index cff1d57b4..c72b7edfa 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -12,8 +12,8 @@ #include #include +#include #include -#include #include namespace TNL { @@ -42,11 +42,16 @@ public: using RealType = Real; using DeviceType = Device; using IndexType = Index; - using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; - using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; using BaseType = Matrix< Real, Device, Index >; - using MatrixRow = DenseRow< Real, Index >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; template< typename _Real = Real, typename _Device = Device, @@ -81,6 +86,13 @@ public: void reset(); + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + void setValue( const RealType& v ); __cuda_callable__ @@ -103,11 +115,11 @@ public: Real getElement( const IndexType row, const IndexType column ) const; - __cuda_callable__ + /*__cuda_callable__ MatrixRow getRow( const IndexType rowIndex ); __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + const MatrixRow getRow( const IndexType rowIndex ) const;*/ template< typename Vector > __cuda_callable__ diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index bed7a37b7..bd0614ad0 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -158,6 +158,32 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const this->values = value; } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + template< typename Real, typename Device, typename Index, @@ -236,46 +262,6 @@ Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( con return this->values.getElement( this->getElementIndex( row, column ) ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getRow( const IndexType rowIndex ) -{ - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - this->rows ); -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -const typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getRow( const IndexType rowIndex ) const -{ - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - this->rows ); -} - template< typename Real, typename Device, typename Index, @@ -898,13 +884,6 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde const IndexType column ) const { return this->segments.getGlobalIndex( row, column ); - /*TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || - std::is_same< Device, Devices::Cuda >::value ), ) - if( std::is_same< Device, Devices::Host >::value ) - return row * this->columns + column; - if( std::is_same< Device, Devices::Cuda >::value ) - return column * this->rows + row; - return -1;*/ } template<> diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h new file mode 100644 index 000000000..84c6b141c --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixRowView.h @@ -0,0 +1,52 @@ +/*************************************************************************** + DenseMatrixRowView.h - description + ------------------- + begin : Jan 3, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView > +class DenseMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using SegmentViewType = SegmentView; + using IndexType = typename SegmentViewType::IndexType; + using ValuesViewType = ValuesView; + + __cuda_callable__ + DenseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const RealType& getValue( const IndexType column ) const; + + __cuda_callable__ + RealType& getValue( const IndexType column ); + + __cuda_callable__ + void setElement( const IndexType column, + const RealType& value ); + protected: + + SegmentViewType segmentView; + + ValuesViewType values; +}; + } // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/DenseMatrixRowView.hpp b/src/TNL/Matrices/DenseMatrixRowView.hpp new file mode 100644 index 000000000..1962a4d9a --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixRowView.hpp @@ -0,0 +1,71 @@ +/*************************************************************************** + DenseMatrixRowView.hpp - description + ------------------- + begin : Jan 3, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ +DenseMatrixRowView< SegmentView, ValuesView >:: +DenseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values ) + : segmentView( segmentView ), values( values ) +{ +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getSize() const -> IndexType +{ + return segmentView.getSize(); +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getValue( const IndexType column ) const -> const RealType& +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + return values[ segmentView.getGlobalIndex( column ) ]; +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getValue( const IndexType column ) -> RealType& +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + return values[ segmentView.getGlobalIndex( column ) ]; +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ void +DenseMatrixRowView< SegmentView, ValuesView >:: +setElement( const IndexType column, + const RealType& value ) +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + const IndexType globalIdx = segmentView.getGlobalIndex( column ); + values[ globalIdx ] = value; +} + + + } // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 8f96af169..c50f71612 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include namespace TNL { namespace Matrices { -- GitLab From 873b9a9c296f632b4f87eee8df8bbf801731d9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 16:25:20 +0100 Subject: [PATCH 061/179] Added methods forRows and rowsReduction to dense matrix. --- src/TNL/Matrices/Dense.h | 230 ++++++++++++++++++++----------------- src/TNL/Matrices/Dense.hpp | 126 +++++++++++++++++++- 2 files changed, 247 insertions(+), 109 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index c72b7edfa..553ecc01d 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -29,156 +29,172 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - // friend class will be needed for templated assignment operators - //template< typename Real2, typename Device2, typename Index2 > - //friend class Dense; + // friend class will be needed for templated assignment operators + //template< typename Real2, typename Device2, typename Index2 > + //friend class Dense; -public: - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - using BaseType = Matrix< Real, Device, Index >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; - using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; - using SegmentViewType = typename SegmentsType::SegmentViewType; - using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = Matrix< Real, Device, Index >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; - // TODO: remove this - using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; - using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Dense< _Real, _Device, _Index >; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Dense< _Real, _Device, _Index >; - Dense(); + Dense(); - static String getSerializationType(); + static String getSerializationType(); - virtual String getSerializationTypeVirtual() const; + virtual String getSerializationTypeVirtual() const; - void setDimensions( const IndexType rows, - const IndexType columns ); + void setDimensions( const IndexType rows, + const IndexType columns ); - template< typename Matrix > - void setLike( const Matrix& matrix ); + template< typename Matrix > + void setLike( const Matrix& matrix ); - /**** - * This method is only for the compatibility with the sparse matrices. - */ - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + /**** + * This method is only for the compatibility with the sparse matrices. + */ + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - [[deprecated]] - IndexType getRowLength( const IndexType row ) const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowLength() const; + IndexType getMaxRowLength() const; - IndexType getNumberOfMatrixElements() const; + IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; + IndexType getNumberOfNonzeroMatrixElements() const; - void reset(); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - __cuda_callable__ - const RowView getRow( const IndexType& rowIdx ) const; - __cuda_callable__ - RowView getRow( const IndexType& rowIdx ); + void reset(); + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; - void setValue( const RealType& v ); + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); - __cuda_callable__ - Real& operator()( const IndexType row, - const IndexType column ); - __cuda_callable__ - const Real& operator()( const IndexType row, - const IndexType column ) const; + void setValue( const RealType& v ); - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + __cuda_callable__ + Real& operator()( const IndexType row, + const IndexType column ); - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + __cuda_callable__ + const Real& operator()( const IndexType row, + const IndexType column ) const; - Real getElement( const IndexType row, - const IndexType column ) const; + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); - /*__cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const;*/ + Real getElement( const IndexType row, + const IndexType column ) const; - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - template< typename InVector, typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - template< typename Matrix > - void addMatrix( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - template< typename Matrix1, typename Matrix2, int tileDim = 32 > - void getMatrixProduct( const Matrix1& matrix1, - const Matrix2& matrix2, - const RealType& matrix1Multiplicator = 1.0, - const RealType& matrix2Multiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - template< typename Matrix, int tileDim = 32 > - void getTransposition( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0 ); + template< typename Function > + void forAllRows( Function& function ) const; - template< typename Vector1, typename Vector2 > - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + template< typename Function > + void forAllRows( Function& function ); - // copy assignment - Dense& operator=( const Dense& matrix ); + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename InVector, typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - void save( const String& fileName ) const; + template< typename Matrix > + void addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - void load( const String& fileName ); + template< typename Matrix1, typename Matrix2, int tileDim = 32 > + void getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator = 1.0, + const RealType& matrix2Multiplicator = 1.0 ); - void save( File& file ) const; + template< typename Matrix, int tileDim = 32 > + void getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0 ); - void load( File& file ); + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - void print( std::ostream& str ) const; + // copy assignment + Dense& operator=( const Dense& matrix ); -protected: + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void save( File& file ) const; + + void load( File& file ); + + void print( std::ostream& str ) const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; - typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class DenseDeviceDependentCode< DeviceType >; + typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class DenseDeviceDependentCode< DeviceType >; - SegmentsType segments; + SegmentsType segments; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index bd0614ad0..680fa3ed2 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -94,6 +94,31 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + template< typename Real, typename Device, typename Index, @@ -256,12 +281,109 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElement( const IndexType row, - const IndexType column ) const +Real +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, + const IndexType column ) const { return this->values.getElement( this->getElementIndex( row, column ) ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto values_view = this->values.getConstView(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + template< typename Real, typename Device, typename Index, -- GitLab From 3f278b0e1925829b50a81afc9368acdcb2de368f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 21:25:42 +0100 Subject: [PATCH 062/179] Fixing dense matrix unit tests. --- src/TNL/Matrices/Dense.h | 11 +- src/TNL/Matrices/Dense.hpp | 11 + src/UnitTests/Matrices/DenseMatrixTest.h | 419 ++++++++++++++--------- 3 files changed, 266 insertions(+), 175 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 553ecc01d..51308280d 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -45,7 +45,7 @@ class Dense : public Matrix< Real, Device, Index > using BaseType = Matrix< Real, Device, Index >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; - using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder >; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; @@ -60,6 +60,8 @@ class Dense : public Matrix< Real, Device, Index > Dense(); + Dense( const IndexType rows, const IndexType columns ); + static String getSerializationType(); virtual String getSerializationTypeVirtual() const; @@ -75,6 +77,9 @@ class Dense : public Matrix< Real, Device, Index > */ void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + [[deprecated]] IndexType getRowLength( const IndexType row ) const; @@ -84,10 +89,6 @@ class Dense : public Matrix< Real, Device, Index > IndexType getNumberOfNonzeroMatrixElements() const; - template< typename Vector > - void getCompressedRowLengths( Vector& rowLengths ) const; - - void reset(); __cuda_callable__ diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 680fa3ed2..ebf2c03b9 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -26,6 +26,17 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense() { } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Dense( const IndexType rows, const IndexType columns ) +{ + this->setDimensions( rows, columns ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index fc6ea6bd2..2184e6360 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -92,6 +94,58 @@ void test_SetLike() EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + template< typename Matrix > void test_GetRowLength() { @@ -508,142 +562,147 @@ void test_AddElement() template< typename Matrix > void test_SetRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 3x7 dense matrix: - * - * / 1 2 3 4 5 6 7 \ - * | 8 9 10 11 12 13 14 | - * \ 15 16 17 18 19 20 21 / - */ - const IndexType rows = 3; - const IndexType cols = 7; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); - - RealType row1 [ 5 ] = { 11, 11, 11, 11, 11 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row2 [ 5 ] = { 22, 22, 22, 22, 22 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row3 [ 5 ] = { 33, 33, 33, 33, 33 }; IndexType colIndexes3 [ 5 ] = { 2, 3, 4, 5, 6 }; - - IndexType row = 0; - IndexType elements = 5; - - // TODO: Fix this - /*m.setRow( row++, colIndexes1, row1, elements ); - m.setRow( row++, colIndexes2, row2, elements ); - m.setRow( row++, colIndexes3, row3, elements );*/ - - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 11 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - EXPECT_EQ( m.getElement( 0, 6 ), 7 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 22 ); - EXPECT_EQ( m.getElement( 1, 1 ), 22 ); - EXPECT_EQ( m.getElement( 1, 2 ), 22 ); - EXPECT_EQ( m.getElement( 1, 3 ), 22 ); - EXPECT_EQ( m.getElement( 1, 4 ), 22 ); - EXPECT_EQ( m.getElement( 1, 5 ), 13 ); - EXPECT_EQ( m.getElement( 1, 6 ), 14 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 15 ); - EXPECT_EQ( m.getElement( 2, 1 ), 16 ); - EXPECT_EQ( m.getElement( 2, 2 ), 33 ); - EXPECT_EQ( m.getElement( 2, 3 ), 33 ); - EXPECT_EQ( m.getElement( 2, 4 ), 33 ); - EXPECT_EQ( m.getElement( 2, 5 ), 33 ); - EXPECT_EQ( m.getElement( 2, 6 ), 33 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 3x7 dense matrix: + * + * / 1 2 3 4 5 6 7 \ + * | 8 9 10 11 12 13 14 | + * \ 15 16 17 18 19 20 21 / + */ + const IndexType rows = 3; + const IndexType cols = 7; + + TNL::Pointers::SharedPointer< Matrix > m; + m->reset(); + m->setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m->setElement( i, j, value++ ); + + Matrix* m_ptr = &m.template modifyData< DeviceType >(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 3 ][ 5 ] { + { 11, 11, 11, 11, 11 }, + { 22, 22, 22, 22, 22 }, + { 33, 33, 33, 33, 33 } }; + IndexType columnIndexes[ 3 ][ 5 ] { + { 0, 1, 2, 3, 4 }, + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 5, 6 } }; + auto row = m_ptr->getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + }; + TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + + EXPECT_EQ( m->getElement( 0, 0 ), 11 ); + EXPECT_EQ( m->getElement( 0, 1 ), 11 ); + EXPECT_EQ( m->getElement( 0, 2 ), 11 ); + EXPECT_EQ( m->getElement( 0, 3 ), 11 ); + EXPECT_EQ( m->getElement( 0, 4 ), 11 ); + EXPECT_EQ( m->getElement( 0, 5 ), 6 ); + EXPECT_EQ( m->getElement( 0, 6 ), 7 ); + + EXPECT_EQ( m->getElement( 1, 0 ), 22 ); + EXPECT_EQ( m->getElement( 1, 1 ), 22 ); + EXPECT_EQ( m->getElement( 1, 2 ), 22 ); + EXPECT_EQ( m->getElement( 1, 3 ), 22 ); + EXPECT_EQ( m->getElement( 1, 4 ), 22 ); + EXPECT_EQ( m->getElement( 1, 5 ), 13 ); + EXPECT_EQ( m->getElement( 1, 6 ), 14 ); + + EXPECT_EQ( m->getElement( 2, 0 ), 15 ); + EXPECT_EQ( m->getElement( 2, 1 ), 16 ); + EXPECT_EQ( m->getElement( 2, 2 ), 33 ); + EXPECT_EQ( m->getElement( 2, 3 ), 33 ); + EXPECT_EQ( m->getElement( 2, 4 ), 33 ); + EXPECT_EQ( m->getElement( 2, 5 ), 33 ); + EXPECT_EQ( m->getElement( 2, 6 ), 33 ); } template< typename Matrix > void test_AddRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); - - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 sparse matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + TNL::Pointers::SharedPointer< Matrix > m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m->setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m->getElement( 0, 0 ), 1 ); + EXPECT_EQ( m->getElement( 0, 1 ), 2 ); + EXPECT_EQ( m->getElement( 0, 2 ), 3 ); + EXPECT_EQ( m->getElement( 0, 3 ), 4 ); + EXPECT_EQ( m->getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m->getElement( 1, 0 ), 6 ); + EXPECT_EQ( m->getElement( 1, 1 ), 7 ); + EXPECT_EQ( m->getElement( 1, 2 ), 8 ); + EXPECT_EQ( m->getElement( 1, 3 ), 9 ); + EXPECT_EQ( m->getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m->getElement( 2, 0 ), 11 ); + EXPECT_EQ( m->getElement( 2, 1 ), 12 ); + EXPECT_EQ( m->getElement( 2, 2 ), 13 ); + EXPECT_EQ( m->getElement( 2, 3 ), 14 ); + EXPECT_EQ( m->getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m->getElement( 3, 0 ), 16 ); + EXPECT_EQ( m->getElement( 3, 1 ), 17 ); + EXPECT_EQ( m->getElement( 3, 2 ), 18 ); + EXPECT_EQ( m->getElement( 3, 3 ), 19 ); + EXPECT_EQ( m->getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m->getElement( 4, 0 ), 21 ); + EXPECT_EQ( m->getElement( 4, 1 ), 22 ); + EXPECT_EQ( m->getElement( 4, 2 ), 23 ); + EXPECT_EQ( m->getElement( 4, 3 ), 24 ); + EXPECT_EQ( m->getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m->getElement( 5, 0 ), 26 ); + EXPECT_EQ( m->getElement( 5, 1 ), 27 ); + EXPECT_EQ( m->getElement( 5, 2 ), 28 ); + EXPECT_EQ( m->getElement( 5, 3 ), 29 ); + EXPECT_EQ( m->getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 }; RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 }; RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 }; @@ -662,42 +721,62 @@ void test_AddRow() m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ ); m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/ - - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 28 ); - EXPECT_EQ( m.getElement( 1, 1 ), 29 ); - EXPECT_EQ( m.getElement( 1, 2 ), 30 ); - EXPECT_EQ( m.getElement( 1, 3 ), 31 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 55 ); - EXPECT_EQ( m.getElement( 2, 1 ), 57 ); - EXPECT_EQ( m.getElement( 2, 2 ), 59 ); - EXPECT_EQ( m.getElement( 2, 3 ), 61 ); - EXPECT_EQ( m.getElement( 2, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 92 ); - EXPECT_EQ( m.getElement( 3, 1 ), 95 ); - EXPECT_EQ( m.getElement( 3, 2 ), 98 ); - EXPECT_EQ( m.getElement( 3, 3 ), 101 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 139 ); - EXPECT_EQ( m.getElement( 4, 1 ), 143 ); - EXPECT_EQ( m.getElement( 4, 2 ), 147 ); - EXPECT_EQ( m.getElement( 4, 3 ), 151 ); - EXPECT_EQ( m.getElement( 4, 4 ), 100 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 196 ); - EXPECT_EQ( m.getElement( 5, 1 ), 201 ); - EXPECT_EQ( m.getElement( 5, 2 ), 206 ); - EXPECT_EQ( m.getElement( 5, 3 ), 211 ); - EXPECT_EQ( m.getElement( 5, 4 ), 150 ); + + Matrix* m_ptr = &m.template modifyData< DeviceType >(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 5 ] { + { 11, 11, 11, 11, 0 }, + { 22, 22, 22, 22, 0 }, + { 33, 33, 33, 33, 0 }, + { 44, 44, 44, 44, 0 }, + { 55, 55, 55, 55, 0 }, + { 66, 66, 66, 66, 0 } }; + auto row = m_ptr->getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + + EXPECT_EQ( m->getElement( 0, 0 ), 11 ); + EXPECT_EQ( m->getElement( 0, 1 ), 11 ); + EXPECT_EQ( m->getElement( 0, 2 ), 11 ); + EXPECT_EQ( m->getElement( 0, 3 ), 11 ); + EXPECT_EQ( m->getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m->getElement( 1, 0 ), 28 ); + EXPECT_EQ( m->getElement( 1, 1 ), 29 ); + EXPECT_EQ( m->getElement( 1, 2 ), 30 ); + EXPECT_EQ( m->getElement( 1, 3 ), 31 ); + EXPECT_EQ( m->getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m->getElement( 2, 0 ), 55 ); + EXPECT_EQ( m->getElement( 2, 1 ), 57 ); + EXPECT_EQ( m->getElement( 2, 2 ), 59 ); + EXPECT_EQ( m->getElement( 2, 3 ), 61 ); + EXPECT_EQ( m->getElement( 2, 4 ), 30 ); + + EXPECT_EQ( m->getElement( 3, 0 ), 92 ); + EXPECT_EQ( m->getElement( 3, 1 ), 95 ); + EXPECT_EQ( m->getElement( 3, 2 ), 98 ); + EXPECT_EQ( m->getElement( 3, 3 ), 101 ); + EXPECT_EQ( m->getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m->getElement( 4, 0 ), 139 ); + EXPECT_EQ( m->getElement( 4, 1 ), 143 ); + EXPECT_EQ( m->getElement( 4, 2 ), 147 ); + EXPECT_EQ( m->getElement( 4, 3 ), 151 ); + EXPECT_EQ( m->getElement( 4, 4 ), 100 ); + + EXPECT_EQ( m->getElement( 5, 0 ), 196 ); + EXPECT_EQ( m->getElement( 5, 1 ), 201 ); + EXPECT_EQ( m->getElement( 5, 2 ), 206 ); + EXPECT_EQ( m->getElement( 5, 3 ), 211 ); + EXPECT_EQ( m->getElement( 5, 4 ), 150 ); } template< typename Matrix > @@ -1352,12 +1431,12 @@ TYPED_TEST( MatrixTest, addRowTest ) test_AddRow< MatrixType >(); } -TYPED_TEST( MatrixTest, vectorProductTest ) +/*TYPED_TEST( MatrixTest, vectorProductTest ) { using MatrixType = typename TestFixture::MatrixType; test_VectorProduct< MatrixType >(); -} +}*/ TYPED_TEST( MatrixTest, addMatrixTest ) { @@ -1499,6 +1578,6 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda ) } #endif -#endif +#endif // HAVE_GTEST #include "../main.h" -- GitLab From 7cb2f2d4f1321c96439265699c44d91ae2bb1981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 21:51:52 +0100 Subject: [PATCH 063/179] Restoring setCompressedRowLengthsTest unit test for sparse matrices. --- src/UnitTests/Matrices/SparseMatrixTest.hpp | 15 ++++----------- .../Matrices/SparseMatrixTest_CSR_segments.h | 19 ++++++------------- .../SparseMatrixTest_SlicedEllpack_segments.h | 19 ++++++------------- 3 files changed, 16 insertions(+), 37 deletions(-) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index df06d28fc..bf261aa84 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -113,17 +113,10 @@ void test_SetCompressedRowLengths() for( IndexType i = 0; i < 8; i++ ) // 9th row m.setElement( 9, i, value++ ); - - EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 ); - EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 ); - EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 ); - EXPECT_EQ( m.getNonZeroRowLength( 3 ), 2 ); - EXPECT_EQ( m.getNonZeroRowLength( 4 ), 3 ); - EXPECT_EQ( m.getNonZeroRowLength( 5 ), 4 ); - EXPECT_EQ( m.getNonZeroRowLength( 6 ), 5 ); - EXPECT_EQ( m.getNonZeroRowLength( 7 ), 6 ); - EXPECT_EQ( m.getNonZeroRowLength( 8 ), 7 ); - EXPECT_EQ( m.getNonZeroRowLength( 9 ), 8 ); + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); } template< typename Matrix1, typename Matrix2 > diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 34f7b4f70..5ac3dde26 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -66,19 +66,12 @@ TYPED_TEST( CSRMatrixTest, setDimensionsTest ) test_SetDimensions< CSRMatrixType >(); } -//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) -//{ -//// using CSRMatrixType = typename TestFixture::CSRMatrixType; -// -//// test_SetCompressedRowLengths< CSRMatrixType >(); -// -// bool testRan = false; -// EXPECT_TRUE( testRan ); -// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -// std::cout << " This test is dependent on the input format. \n"; -// std::cout << " Almost every format allocates elements per row differently.\n\n"; -// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; -//} +TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetCompressedRowLengths< CSRMatrixType >(); +} TYPED_TEST( CSRMatrixTest, setLikeTest ) { diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index 8eba34a2b..de5356f3a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -78,19 +78,12 @@ TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) test_SetDimensions< SlicedEllpackMatrixType >(); } -//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) -//{ -//// using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -// -//// test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); -// -// bool testRan = false; -// EXPECT_TRUE( testRan ); -// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -// std::cout << " This test is dependent on the input format. \n"; -// std::cout << " Almost every format allocates elements per row differently.\n\n"; -// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; -//} +TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +} TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest ) { -- GitLab From 5043ac64c3847dc5eb1a8e00f54e5dcf52bd403d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 3 Jan 2020 22:03:56 +0100 Subject: [PATCH 064/179] Restoring dense matrix unit tests. --- src/UnitTests/Matrices/CMakeLists.txt | 3 +-- src/UnitTests/Matrices/DenseMatrixTest.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 668e272df..e4616f23b 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -55,8 +55,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -# TODO: DenseMatrixTest is not finished -#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) #### # Segments tests diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 2184e6360..c7ada1240 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -585,6 +585,7 @@ void test_SetRow() for( IndexType j = 0; j < cols; j++ ) m->setElement( i, j, value++ ); + // TODO: replace this with dense matrix view Matrix* m_ptr = &m.template modifyData< DeviceType >(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { RealType values[ 3 ][ 5 ] { @@ -597,7 +598,7 @@ void test_SetRow() { 2, 3, 4, 5, 6 } }; auto row = m_ptr->getRow( rowIdx ); for( IndexType i = 0; i < 5; i++ ) - row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + / row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); }; TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); -- GitLab From 7049c029b5218291c7cca95ad9b5d93fa44745e9 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sat, 4 Jan 2020 22:10:15 +0100 Subject: [PATCH 065/179] Added DenseMatrixView. --- src/TNL/Matrices/Dense.h | 7 + src/TNL/Matrices/Dense.hpp | 34 +- src/TNL/Matrices/DenseMatrixView.h | 207 +++++ src/TNL/Matrices/DenseMatrixView.hpp | 1068 ++++++++++++++++++++++ src/UnitTests/Matrices/DenseMatrixTest.h | 4 +- 5 files changed, 1317 insertions(+), 3 deletions(-) create mode 100644 src/TNL/Matrices/DenseMatrixView.h create mode 100644 src/TNL/Matrices/DenseMatrixView.hpp diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 51308280d..18249a7b1 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -14,6 +14,7 @@ #include #include #include +#include #include namespace TNL { @@ -47,6 +48,8 @@ class Dense : public Matrix< Real, Device, Index > using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; + using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; // TODO: remove this @@ -61,6 +64,10 @@ class Dense : public Matrix< Real, Device, Index > Dense(); Dense( const IndexType rows, const IndexType columns ); + + ViewType getView(); + + ConstViewType getConstView() const; static String getSerializationType(); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index ebf2c03b9..85f1b560d 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Dense_impl.h - description + Dense.hpp - description ------------------- begin : Nov 29, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -37,6 +37,38 @@ Dense( const IndexType rows, const IndexType columns ) this->setDimensions( rows, columns ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + template< typename, typename, typename > class Segments, + typename RealAllocator > +auto +Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + template< typename, typename, typename > class Segments, + typename RealAllocator > +auto +Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->segments.getConstView() ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h new file mode 100644 index 000000000..2334eb636 --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -0,0 +1,207 @@ +/*************************************************************************** + DenseMatrixView.h - description + ------------------- + begin : Nov 29, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +//template< typename Device > +//class DenseDeviceDependentCode; + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class DenseMatrixView : public MatrixView< Real, Device, Index > +{ + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + + // friend class will be needed for templated assignment operators + //template< typename Real2, typename Device2, typename Index2 > + //friend class Dense; + + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = Matrix< Real, Device, Index >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; + using SegmentsViewType = typename SegmentsType::ViewType; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Dense< _Real, _Device, _Index >; + + __cuda_callable__ + DenseMatrixView(); + + __cuda_callable__ + DenseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const SegmentsViewType& segments ); + + __cuda_callable__ + DenseMatrixView( const DenseMatrixView& m ) = default; + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + IndexType getNumberOfMatrixElements() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + + void setValue( const RealType& v ); + + __cuda_callable__ + Real& operator()( const IndexType row, + const IndexType column ); + + __cuda_callable__ + const Real& operator()( const IndexType row, + const IndexType column ) const; + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + Real getElement( const IndexType row, + const IndexType column ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Matrix > + void addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Matrix1, typename Matrix2, int tileDim = 32 > + void getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator = 1.0, + const RealType& matrix2Multiplicator = 1.0 ); + + template< typename Matrix, int tileDim = 32 > + void getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + // copy assignment + Dense& operator=( const Dense& matrix ); + + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void save( File& file ) const; + + void load( File& file ); + + void print( std::ostream& str ) const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; + + typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class DenseDeviceDependentCode< DeviceType >; + + SegmentsViewType segments; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp new file mode 100644 index 000000000..18d6574ac --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -0,0 +1,1068 @@ +/*************************************************************************** + DenseMatrixView.hpp - description + ------------------- + begin : Nov 29, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes, + const SegmentsViewType& segments ) + : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->columnIndexes.getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->getColumnsIndexes().getConstView(), + this->segments.getConstView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return String( "Matrices::Dense< " ) + + getType< RealType >() + ", " + + getType< Device >() + ", " + + getType< IndexType >() + " >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const +{ + return this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const +{ + return this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const +{ + return this->getRows() * this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset() +{ + Matrix< Real, Device, Index >::reset(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value ) +{ + this->values = value; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, + const IndexType column ) +{ + TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); + TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); + + return this->values.operator[]( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, + const IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); + TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); + + return this->values.operator[]( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->values.setElement( this->getElementIndex( row, column ), value ); + return true; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + const IndexType elementIndex = this->getElementIndex( row, column ); + if( thisElementMultiplicator == 1.0 ) + this->values.setElement( elementIndex, + this->values.getElement( elementIndex ) + value ); + else + this->values.setElement( elementIndex, + thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); + return true; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, + const IndexType column ) const +{ + return this->values.getElement( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto values_view = this->values.getConstView(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, columnIdx, values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + RealType sum( 0.0 ); + // TODO: Fix this + //for( IndexType column = 0; column < this->getColumns(); column++ ) + // sum += this->getElementFast( row, column ) * vector[ column ]; + return sum; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename InVector, + typename OutVector > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector, + OutVector& outVector ) const +{ + TNL_ASSERT( this->getColumns() == inVector.getSize(), + std::cerr << "Matrix columns: " << this->getColumns() << std::endl + << "Vector size: " << inVector.getSize() << std::endl ); + TNL_ASSERT( this->getRows() == outVector.getSize(), + std::cerr << "Matrix rows: " << this->getRows() << std::endl + << "Vector size: " << outVector.getSize() << std::endl ); + + DeviceDependentCode::vectorProduct( *this, inVector, outVector ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Matrix > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT( this->getColumns() == matrix.getColumns() && + this->getRows() == matrix.getRows(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "That matrix columns: " << matrix.getColumns() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.values; + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix1, + typename Matrix2, + int tileDim, + int tileRowBlockSize > +__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix1* matrixA, + const Matrix2* matrixB, + const Real matrixAMultiplicator, + const Real matrixBMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + /**** + * Here we compute product C = A * B. To profit from the fast + * shared memory we do it by tiles. + */ + + typedef Index IndexType; + typedef Real RealType; + __shared__ Real tileA[ tileDim*tileDim ]; + __shared__ Real tileB[ tileDim*tileDim ]; + __shared__ Real tileC[ tileDim*tileDim ]; + + const IndexType& matrixARows = matrixA->getRows(); + const IndexType& matrixAColumns = matrixA->getColumns(); + const IndexType& matrixBRows = matrixB->getRows(); + const IndexType& matrixBColumns = matrixB->getColumns(); + + /**** + * Reset the tile C + */ + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0; + + /**** + * Compute the result tile coordinates + */ + const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim; + const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim; + + /**** + * Sum over the matrix tiles + */ + for( IndexType i = 0; i < matrixAColumns; i += tileDim ) + { + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixARow = resultTileRow + threadIdx.y + row; + const IndexType matrixAColumn = i + threadIdx.x; + if( matrixARow < matrixARows && matrixAColumn < matrixAColumns ) + tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixAMultiplicator * matrixA->getElementFast( matrixARow, matrixAColumn ); + + const IndexType matrixBRow = i + threadIdx.y + row; + const IndexType matrixBColumn = resultTileColumn + threadIdx.x; + if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns ) + tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn ); + } + __syncthreads(); + + const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow ); + const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i ); + const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i ); + const IndexType tileBLastColumn = + tnlCudaMin( tileDim, matrixBColumns - resultTileColumn ); + + for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize ) + { + RealType sum( 0.0 ); + for( IndexType j = 0; j < tileALastColumn; j++ ) + sum += tileA[ ( threadIdx.y + row )*tileDim + j ]* + tileB[ j*tileDim + threadIdx.x ]; + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum; + } + __syncthreads(); + } + + /**** + * Write the result tile to the result matrix + */ + const IndexType& matrixCRows = resultMatrix->getRows(); + const IndexType& matrixCColumns = resultMatrix->getColumns(); + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixCRow = resultTileRow + row + threadIdx.y; + const IndexType matrixCColumn = resultTileColumn + threadIdx.x; + if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns ) + resultMatrix->setElementFast( matrixCRow, + matrixCColumn, + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] ); + } + +} +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Matrix1, typename Matrix2, int tileDim > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator, + const RealType& matrix2Multiplicator ) +{ + TNL_ASSERT( matrix1.getColumns() == matrix2.getRows() && + this->getRows() == matrix1.getRows() && + this->getColumns() == matrix2.getColumns(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "Matrix1 columns: " << matrix1.getColumns() << std::endl + << "Matrix1 rows: " << matrix1.getRows() << std::endl + << "Matrix2 columns: " << matrix2.getColumns() << std::endl + << "Matrix2 rows: " << matrix2.getRows() << std::endl ); + + if( std::is_same< Device, Devices::Host >::value ) + for( IndexType i = 0; i < this->getRows(); i += tileDim ) + for( IndexType j = 0; j < this->getColumns(); j += tileDim ) + { + const IndexType tileRows = min( tileDim, this->getRows() - i ); + const IndexType tileColumns = min( tileDim, this->getColumns() - j ); + for( IndexType i1 = i; i1 < i + tileRows; i1++ ) + for( IndexType j1 = j; j1 < j + tileColumns; j1++ ) + this->setElementFast( i1, j1, 0.0 ); + + for( IndexType k = 0; k < matrix1.getColumns(); k += tileDim ) + { + const IndexType lastK = min( k + tileDim, matrix1.getColumns() ); + for( IndexType i1 = 0; i1 < tileRows; i1++ ) + for( IndexType j1 = 0; j1 < tileColumns; j1++ ) + for( IndexType k1 = k; k1 < lastK; k1++ ) + this->addElementFast( i + i1, j + j1, + matrix1.getElementFast( i + i1, k1 ) * matrix2.getElementFast( k1, j + j1 ) ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + const IndexType matrixProductCudaBlockSize( 256 ); + const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); + const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); + const IndexType cudaBlockColumns( tileDim ); + const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); + cudaBlockSize.x = cudaBlockColumns; + cudaBlockSize.y = cudaBlockRows; + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); + + for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) + for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) + { + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); + if( gridIdx_x == columnGrids - 1 ) + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); + if( gridIdx_y == rowGrids - 1 ) + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); + Dense* this_kernel = Cuda::passToDevice( *this ); + Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 ); + Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 ); + DenseMatrixProductKernel< Real, + Index, + Matrix1, + Matrix2, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + 3*tileDim*tileDim >>> + ( this_kernel, + matrix1_kernel, + matrix2_kernel, + matrix1Multiplicator, + matrix2Multiplicator, + gridIdx_x, + gridIdx_y ); + Cuda::freeFromDevice( this_kernel ); + Cuda::freeFromDevice( matrix1_kernel ); + Cuda::freeFromDevice( matrix2_kernel ); + } +#endif + } +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Index, + typename Matrix, + bool RowMajorOrder, + typename RealAllocator, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + + } + +} + +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + if( readColumnPosition < columns ) + { + const Index readOffset = readRowPosition * columns + readColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( readRowPosition + rowBlock < rows ) + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + if( writeColumnPosition < rows ) + { + const Index writeOffset = writeRowPosition * rows + writeColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( writeRowPosition + rowBlock < columns ) + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + } + } + +} + + +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Matrix, int tileDim > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getColumns() == matrix.getRows() && + this->getRows() == matrix.getColumns(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "That matrix columns: " << matrix.getColumns() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + const IndexType& columns = matrix.getColumns(); + for( IndexType i = 0; i < rows; i += tileDim ) + for( IndexType j = 0; j < columns; j += tileDim ) + for( IndexType k = i; k < i + tileDim && k < rows; k++ ) + for( IndexType l = j; l < j + tileDim && l < columns; l++ ) + this->setElement( l, k, matrixMultiplicator * matrix. getElement( k, l ) ); + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + const IndexType matrixProductCudaBlockSize( 256 ); + const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); + const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); + const IndexType cudaBlockColumns( tileDim ); + const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); + cudaBlockSize.x = cudaBlockColumns; + cudaBlockSize.y = cudaBlockRows; + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); + const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks(); + + Dense* this_device = Cuda::passToDevice( *this ); + Matrix* matrix_device = Cuda::passToDevice( matrix ); + + for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) + for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) + { + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); + if( gridIdx_x == columnGrids - 1) + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); + if( gridIdx_y == rowGrids - 1 ) + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); + if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) && + ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) ) + { + DenseTranspositionAlignedKernel< Real, + Index, + Matrix, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + sharedMemorySize >>> + ( this_device, + matrix_device, + matrixMultiplicator, + gridIdx_x, + gridIdx_y ); + } + else + { + DenseTranspositionNonAlignedKernel< Real, + Index, + Matrix, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + sharedMemorySize >>> + ( this_device, + matrix_device, + matrixMultiplicator, + gridIdx_x, + gridIdx_y ); + } + TNL_CHECK_CUDA_DEVICE; + } + Cuda::freeFromDevice( this_device ); + Cuda::freeFromDevice( matrix_device ); +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector1, typename Vector2 > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ), diagonalValue; + for( IndexType i = 0; i < this->getColumns(); i++ ) + { + if( i == row ) + diagonalValue = this->getElement( row, row ); + else + sum += this->getElement( row, i ) * x[ i ]; + } + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); +} + + +// copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +DenseMatrixView< Real, Device, Index, RowMajorOrder >& +DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& matrix ) +{ + this->setLike( matrix ); + this->values = matrix.values; + return *this; +} + +// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real2, typename Device2, typename Index2, typename > +DenseMatrixView< Real, Device, Index, RowMajorOrder >& +DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) +{ + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, + "unknown device" ); + static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, + "unknown device" ); + + this->setLike( matrix ); + + throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const +{ + Matrix< Real, Device, Index >::save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file ) +{ + Matrix< Real, Device, Index >::load( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = 0; column < this->getColumns(); column++ ) + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row, + const IndexType column ) const +{ + return this->segments.getGlobalIndex( row, column ); +} + +template<> +class DenseDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class DenseDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + +} // namespace Matrices +} // namespace TNL diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index c7ada1240..c0f9b92ff 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -597,8 +597,8 @@ void test_SetRow() { 0, 1, 2, 3, 4 }, { 2, 3, 4, 5, 6 } }; auto row = m_ptr->getRow( rowIdx ); - for( IndexType i = 0; i < 5; i++ ) - / row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + //for( IndexType i = 0; i < 5; i++ ) + /// row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); }; TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); -- GitLab From 16f3e12af5a32bd121e86d13af5bb59c3f5bb0b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 5 Jan 2020 20:57:18 +0100 Subject: [PATCH 066/179] Fixing DenseMatrix unit tests. --- src/TNL/Containers/Segments/Ellpack.hpp | 10 +- src/TNL/Matrices/Dense.h | 4 +- src/TNL/Matrices/Dense.hpp | 72 ++----- src/TNL/Matrices/DenseMatrixView.h | 18 +- src/TNL/Matrices/DenseMatrixView.hpp | 162 ++++++--------- src/TNL/Matrices/details/DenseMatrix.h | 67 +++++++ src/UnitTests/Matrices/DenseMatrixTest.h | 240 ++++++++++------------- 7 files changed, 270 insertions(+), 303 deletions(-) create mode 100644 src/TNL/Matrices/details/DenseMatrix.h diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 97a256c9e..8763c2e5d 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -306,7 +306,7 @@ void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; @@ -315,8 +315,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); - for( IndexType j = begin; j < end && compute; j++ ) - reduction( aux, fetch( i, j, compute, args... ) ); + for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++ ) + reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -330,8 +330,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = storageSize; RealType aux( zero ); bool compute( true ); - for( IndexType j = begin; j < end && compute; j += alignedSize ) - reduction( aux, fetch( i, j, compute, args... ) ); + for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++ ) + reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 18249a7b1..90aa57170 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -48,8 +48,8 @@ class Dense : public Matrix< Real, Device, Index > using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; - using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; - using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; // TODO: remove this diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 85f1b560d..fe11d6759 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -41,10 +41,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, - template< typename, typename, typename > class Segments, typename RealAllocator > auto -Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getView() -> ViewType { return ViewType( this->getRows(), @@ -57,10 +56,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, - template< typename, typename, typename > class Segments, typename RealAllocator > auto -Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), @@ -451,8 +449,9 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > -void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl @@ -461,7 +460,20 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { + return valuesView[ offset ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, @@ -1051,51 +1063,5 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde return this->segments.getGlobalIndex( row, column ); } -template<> -class DenseDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename InVector, - typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class DenseDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename InVector, - typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 2334eb636..23f5d7317 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -14,14 +14,11 @@ #include #include #include -#include +#include namespace TNL { namespace Matrices { -//template< typename Device > -//class DenseDeviceDependentCode; - template< typename Real = double, typename Device = Devices::Host, typename Index = int, @@ -48,6 +45,9 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + // TODO: remove this using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; @@ -56,7 +56,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index > - using Self = Dense< _Real, _Device, _Index >; + using Self = DenseMatrixView< _Real, _Device, _Index >; __cuda_callable__ DenseMatrixView(); @@ -172,12 +172,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment - Dense& operator=( const Dense& matrix ); + DenseMatrixView& operator=( const DenseMatrixView& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ); void save( const String& fileName ) const; @@ -195,8 +195,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getElementIndex( const IndexType row, const IndexType column ) const; - typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class DenseDeviceDependentCode< DeviceType >; + //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + //friend class DenseDeviceDependentCode< DeviceType >; SegmentsViewType segments; }; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 18d6574ac..08cfab843 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -20,10 +20,9 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > + bool RowMajorOrder > __cuda_callable__ -DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: DenseMatrixView() { } @@ -31,27 +30,24 @@ DenseMatrixView() template< typename Real, typename Device, typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > + bool RowMajorOrder > __cuda_callable__ -DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: DenseMatrixView( const IndexType rows, - const IndexType columns, - const ValuesViewType& values, - const ColumnsIndexesViewType& columnIndexes, - const SegmentsViewType& segments ) - : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) + const IndexType columns, + const ValuesViewType& values, + const SegmentsViewType& segments ) + : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments ) { } template< typename Real, typename Device, typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > + bool RowMajorOrder > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getView() -> ViewType { return ViewType( this->getRows(), @@ -64,11 +60,10 @@ getView() -> ViewType template< typename Real, typename Device, typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > + bool RowMajorOrder > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), @@ -95,8 +90,7 @@ getSerializationType() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > String DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getSerializationTypeVirtual() const @@ -107,8 +101,7 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Vector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -132,8 +125,7 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const { return this->getColumns(); @@ -142,8 +134,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const { return this->getColumns(); @@ -152,8 +143,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() c template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const { return this->getRows() * this->getColumns(); @@ -162,8 +152,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixEl template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); @@ -176,8 +165,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroM template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset() { Matrix< Real, Device, Index >::reset(); @@ -186,8 +174,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value ) { this->values = value; @@ -196,8 +183,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > __cuda_callable__ auto DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getRow( const IndexType& rowIdx ) const -> const RowView @@ -209,8 +195,7 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > __cuda_callable__ auto DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getRow( const IndexType& rowIdx ) -> RowView @@ -222,8 +207,7 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > __cuda_callable__ Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, const IndexType column ) @@ -239,8 +223,7 @@ Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const I template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > __cuda_callable__ const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, const IndexType column ) const @@ -256,8 +239,7 @@ const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( c template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row, const IndexType column, const RealType& value ) @@ -269,8 +251,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const In template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row, const IndexType column, const RealType& value, @@ -289,8 +270,7 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const In template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > Real DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getElement( const IndexType row, @@ -302,8 +282,7 @@ getElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -320,8 +299,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -333,8 +311,7 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Function > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -352,8 +329,7 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Function > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -371,8 +347,7 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Function > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -384,8 +359,7 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Function > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: @@ -397,8 +371,7 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Vector > __cuda_callable__ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row, @@ -414,8 +387,7 @@ typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >: template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename InVector, typename OutVector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector, @@ -428,14 +400,13 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Matrix > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix, const RealType& matrixMultiplicator, @@ -454,7 +425,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Mat this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; } -#ifdef HAVE_CUDA +#ifdef HAVE_CUDA_______________ template< typename Real, typename Index, bool RowMajorOrder, @@ -463,7 +434,7 @@ template< typename Real, typename Matrix2, int tileDim, int tileRowBlockSize > -__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, +__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix, const Matrix1* matrixA, const Matrix2* matrixB, const Real matrixAMultiplicator, @@ -558,8 +529,7 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Matrix1, typename Matrix2, int tileDim > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1, const Matrix2& matrix2, @@ -599,7 +569,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); const IndexType matrixProductCudaBlockSize( 256 ); const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); @@ -640,12 +610,12 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co Cuda::freeFromDevice( this_kernel ); Cuda::freeFromDevice( matrix1_kernel ); Cuda::freeFromDevice( matrix2_kernel ); - } + }*/ #endif } } -#ifdef HAVE_CUDA +#ifdef HAVE_CUDA________________________ template< typename Real, typename Index, typename Matrix, @@ -802,8 +772,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Matrix, int tileDim > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix, const RealType& matrixMultiplicator ) @@ -828,7 +797,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); const IndexType matrixProductCudaBlockSize( 256 ); const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); @@ -887,7 +856,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co TNL_CHECK_CUDA_DEVICE; } Cuda::freeFromDevice( this_device ); - Cuda::freeFromDevice( matrix_device ); + Cuda::freeFromDevice( matrix_device );*/ #endif } } @@ -895,8 +864,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Vector1, typename Vector2 > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b, const IndexType row, @@ -919,10 +887,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > DenseMatrixView< Real, Device, Index, RowMajorOrder >& -DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& matrix ) +DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -933,11 +900,10 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense& m template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > template< typename Real2, typename Device2, typename Index2, typename > DenseMatrixView< Real, Device, Index, RowMajorOrder >& -DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) +DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -953,8 +919,7 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const Dense< R template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const { Object::save( fileName ); @@ -963,8 +928,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName ) { Object::load( fileName ); @@ -973,28 +937,25 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const { - Matrix< Real, Device, Index >::save( file ); + MatrixView< Real, Device, Index >::save( file ); } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file ) { - Matrix< Real, Device, Index >::load( file ); + MatrixView< Real, Device, Index >::load( file ); } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) @@ -1009,8 +970,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, - typename RealAllocator > + bool RowMajorOrder > __cuda_callable__ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row, const IndexType column ) const @@ -1018,7 +978,7 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( co return this->segments.getGlobalIndex( row, column ); } -template<> +/*template<> class DenseDeviceDependentCode< Devices::Host > { public: @@ -1062,7 +1022,7 @@ class DenseDeviceDependentCode< Devices::Cuda > { MatrixVectorProductCuda( matrix, inVector, outVector ); } -}; +};*/ } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h new file mode 100644 index 000000000..813e58bc4 --- /dev/null +++ b/src/TNL/Matrices/details/DenseMatrix.h @@ -0,0 +1,67 @@ +/*************************************************************************** + DenseMatrix.h - description + ------------------- + begin : Jan 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Device > +class DenseDeviceDependentCode; +template<> +class DenseDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class DenseDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + + } //namespace details + } //namepsace Matrices +} //namespace TNL \ No newline at end of file diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index c0f9b92ff..897861f7f 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include #include @@ -576,17 +574,16 @@ void test_SetRow() const IndexType rows = 3; const IndexType cols = 7; - TNL::Pointers::SharedPointer< Matrix > m; - m->reset(); - m->setDimensions( rows, cols ); + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m->setElement( i, j, value++ ); + m.setElement( i, j, value++ ); - // TODO: replace this with dense matrix view - Matrix* m_ptr = &m.template modifyData< DeviceType >(); + auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { RealType values[ 3 ][ 5 ] { { 11, 11, 11, 11, 11 }, @@ -596,36 +593,35 @@ void test_SetRow() { 0, 1, 2, 3, 4 }, { 0, 1, 2, 3, 4 }, { 2, 3, 4, 5, 6 } }; - auto row = m_ptr->getRow( rowIdx ); - //for( IndexType i = 0; i < 5; i++ ) - /// row.setElement( rowIdx, i ); //columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); }; - TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); - EXPECT_EQ( m->getElement( 0, 0 ), 11 ); - EXPECT_EQ( m->getElement( 0, 1 ), 11 ); - EXPECT_EQ( m->getElement( 0, 2 ), 11 ); - EXPECT_EQ( m->getElement( 0, 3 ), 11 ); - EXPECT_EQ( m->getElement( 0, 4 ), 11 ); - EXPECT_EQ( m->getElement( 0, 5 ), 6 ); - EXPECT_EQ( m->getElement( 0, 6 ), 7 ); - - EXPECT_EQ( m->getElement( 1, 0 ), 22 ); - EXPECT_EQ( m->getElement( 1, 1 ), 22 ); - EXPECT_EQ( m->getElement( 1, 2 ), 22 ); - EXPECT_EQ( m->getElement( 1, 3 ), 22 ); - EXPECT_EQ( m->getElement( 1, 4 ), 22 ); - EXPECT_EQ( m->getElement( 1, 5 ), 13 ); - EXPECT_EQ( m->getElement( 1, 6 ), 14 ); - - EXPECT_EQ( m->getElement( 2, 0 ), 15 ); - EXPECT_EQ( m->getElement( 2, 1 ), 16 ); - EXPECT_EQ( m->getElement( 2, 2 ), 33 ); - EXPECT_EQ( m->getElement( 2, 3 ), 33 ); - EXPECT_EQ( m->getElement( 2, 4 ), 33 ); - EXPECT_EQ( m->getElement( 2, 5 ), 33 ); - EXPECT_EQ( m->getElement( 2, 6 ), 33 ); + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 11 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + EXPECT_EQ( m.getElement( 0, 6 ), 7 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); + EXPECT_EQ( m.getElement( 1, 1 ), 22 ); + EXPECT_EQ( m.getElement( 1, 2 ), 22 ); + EXPECT_EQ( m.getElement( 1, 3 ), 22 ); + EXPECT_EQ( m.getElement( 1, 4 ), 22 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 14 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 2, 1 ), 16 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 33 ); + EXPECT_EQ( m.getElement( 2, 4 ), 33 ); + EXPECT_EQ( m.getElement( 2, 5 ), 33 ); + EXPECT_EQ( m.getElement( 2, 6 ), 33 ); } template< typename Matrix > @@ -648,49 +644,49 @@ void test_AddRow() const IndexType rows = 6; const IndexType cols = 5; - TNL::Pointers::SharedPointer< Matrix > m( rows, cols ); + Matrix m( rows, cols ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m->setElement( i, j, value++ ); + m.setElement( i, j, value++ ); // Check the added elements - EXPECT_EQ( m->getElement( 0, 0 ), 1 ); - EXPECT_EQ( m->getElement( 0, 1 ), 2 ); - EXPECT_EQ( m->getElement( 0, 2 ), 3 ); - EXPECT_EQ( m->getElement( 0, 3 ), 4 ); - EXPECT_EQ( m->getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m->getElement( 1, 0 ), 6 ); - EXPECT_EQ( m->getElement( 1, 1 ), 7 ); - EXPECT_EQ( m->getElement( 1, 2 ), 8 ); - EXPECT_EQ( m->getElement( 1, 3 ), 9 ); - EXPECT_EQ( m->getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m->getElement( 2, 0 ), 11 ); - EXPECT_EQ( m->getElement( 2, 1 ), 12 ); - EXPECT_EQ( m->getElement( 2, 2 ), 13 ); - EXPECT_EQ( m->getElement( 2, 3 ), 14 ); - EXPECT_EQ( m->getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m->getElement( 3, 0 ), 16 ); - EXPECT_EQ( m->getElement( 3, 1 ), 17 ); - EXPECT_EQ( m->getElement( 3, 2 ), 18 ); - EXPECT_EQ( m->getElement( 3, 3 ), 19 ); - EXPECT_EQ( m->getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m->getElement( 4, 0 ), 21 ); - EXPECT_EQ( m->getElement( 4, 1 ), 22 ); - EXPECT_EQ( m->getElement( 4, 2 ), 23 ); - EXPECT_EQ( m->getElement( 4, 3 ), 24 ); - EXPECT_EQ( m->getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m->getElement( 5, 0 ), 26 ); - EXPECT_EQ( m->getElement( 5, 1 ), 27 ); - EXPECT_EQ( m->getElement( 5, 2 ), 28 ); - EXPECT_EQ( m->getElement( 5, 3 ), 29 ); - EXPECT_EQ( m->getElement( 5, 4 ), 30 ); + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); // Add new elements to the old elements with a multiplying factor applied to the old elements. /* @@ -704,26 +700,7 @@ void test_AddRow() * \ 78 81 84 87 90 / */ - RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row3 [ 5 ] = { 44, 44, 44, 44, 0 }; IndexType colIndexes3 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row4 [ 5 ] = { 55, 55, 55, 55, 0 }; IndexType colIndexes4 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row5 [ 5 ] = { 66, 66, 66, 66, 0 }; IndexType colIndexes5 [ 5 ] = { 0, 1, 2, 3, 4 }; - - IndexType row = 0; - IndexType elements = 5; - RealType thisRowMultiplicator = 0; - - // TODO: Fix this - /*m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ );*/ - - Matrix* m_ptr = &m.template modifyData< DeviceType >(); + auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { RealType values[ 6 ][ 5 ] { { 11, 11, 11, 11, 0 }, @@ -732,52 +709,51 @@ void test_AddRow() { 44, 44, 44, 44, 0 }, { 55, 55, 55, 55, 0 }, { 66, 66, 66, 66, 0 } }; - auto row = m_ptr->getRow( rowIdx ); + auto row = matrix_view.getRow( rowIdx ); for( IndexType i = 0; i < 5; i++ ) { RealType& val = row.getValue( i ); val = rowIdx * val + values[ rowIdx ][ i ]; } }; - TNL::Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); - EXPECT_EQ( m->getElement( 0, 0 ), 11 ); - EXPECT_EQ( m->getElement( 0, 1 ), 11 ); - EXPECT_EQ( m->getElement( 0, 2 ), 11 ); - EXPECT_EQ( m->getElement( 0, 3 ), 11 ); - EXPECT_EQ( m->getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m->getElement( 1, 0 ), 28 ); - EXPECT_EQ( m->getElement( 1, 1 ), 29 ); - EXPECT_EQ( m->getElement( 1, 2 ), 30 ); - EXPECT_EQ( m->getElement( 1, 3 ), 31 ); - EXPECT_EQ( m->getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m->getElement( 2, 0 ), 55 ); - EXPECT_EQ( m->getElement( 2, 1 ), 57 ); - EXPECT_EQ( m->getElement( 2, 2 ), 59 ); - EXPECT_EQ( m->getElement( 2, 3 ), 61 ); - EXPECT_EQ( m->getElement( 2, 4 ), 30 ); - - EXPECT_EQ( m->getElement( 3, 0 ), 92 ); - EXPECT_EQ( m->getElement( 3, 1 ), 95 ); - EXPECT_EQ( m->getElement( 3, 2 ), 98 ); - EXPECT_EQ( m->getElement( 3, 3 ), 101 ); - EXPECT_EQ( m->getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m->getElement( 4, 0 ), 139 ); - EXPECT_EQ( m->getElement( 4, 1 ), 143 ); - EXPECT_EQ( m->getElement( 4, 2 ), 147 ); - EXPECT_EQ( m->getElement( 4, 3 ), 151 ); - EXPECT_EQ( m->getElement( 4, 4 ), 100 ); - - EXPECT_EQ( m->getElement( 5, 0 ), 196 ); - EXPECT_EQ( m->getElement( 5, 1 ), 201 ); - EXPECT_EQ( m->getElement( 5, 2 ), 206 ); - EXPECT_EQ( m->getElement( 5, 3 ), 211 ); - EXPECT_EQ( m->getElement( 5, 4 ), 150 ); + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 31 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 55 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 92 ); + EXPECT_EQ( m.getElement( 3, 1 ), 95 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 139 ); + EXPECT_EQ( m.getElement( 4, 1 ), 143 ); + EXPECT_EQ( m.getElement( 4, 2 ), 147 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 100 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 196 ); + EXPECT_EQ( m.getElement( 5, 1 ), 201 ); + EXPECT_EQ( m.getElement( 5, 2 ), 206 ); + EXPECT_EQ( m.getElement( 5, 3 ), 211 ); + EXPECT_EQ( m.getElement( 5, 4 ), 150 ); } template< typename Matrix > @@ -1263,8 +1239,6 @@ void test_SaveAndLoad() EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); - - std::cout << "\nThis will create a file called '" << TEST_FILE_NAME << "' (of the matrix created in the test function), in .../tnl-dev/Debug/bin/\n\n"; } template< typename Matrix > @@ -1432,12 +1406,12 @@ TYPED_TEST( MatrixTest, addRowTest ) test_AddRow< MatrixType >(); } -/*TYPED_TEST( MatrixTest, vectorProductTest ) +TYPED_TEST( MatrixTest, vectorProductTest ) { using MatrixType = typename TestFixture::MatrixType; test_VectorProduct< MatrixType >(); -}*/ +} TYPED_TEST( MatrixTest, addMatrixTest ) { -- GitLab From 65d9b74cec3690588bce4dd3786c8a84e81565bc Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Mon, 6 Jan 2020 14:54:04 +0100 Subject: [PATCH 067/179] I --- src/TNL/Containers/Segments/CSR.hpp | 3 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 6 +- src/TNL/Matrices/Dense.h | 6 + src/TNL/Matrices/Dense.hpp | 27 +++++ src/TNL/Matrices/SparseMatrix.h | 7 ++ src/TNL/Matrices/SparseMatrix.hpp | 107 +++++++++++++++++- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 85 +++++++++++++- 7 files changed, 234 insertions(+), 7 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 8b8ddfff5..971754b5a 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -225,8 +225,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType j = begin; j < end && compute; j++ ) - reduction( aux, fetch( i, j, compute, args... ) ); + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 76790f393..31f417df2 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -354,8 +354,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -370,8 +371,9 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); bool compute( true ); + IndexType localIdx( 0 ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 90aa57170..2e283c9e8 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -183,6 +183,12 @@ class Dense : public Matrix< Real, Device, Index > typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + void save( const String& fileName ) const; void load( const String& fileName ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index fe11d6759..ecd5aec1c 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -994,6 +994,33 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Den throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +{ + return( this->getRows() == matrix.getRows() && + this->getColumns() == matrix.getColumns() && + this->getValues() == matrix.getValues() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +{ + return ! ( *this == matrix ); +} template< typename Real, typename Device, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index c50f71612..75d917928 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -175,6 +175,13 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > */ SparseMatrix& operator=( const SparseMatrix& matrix ); + /** + * \brief Assignment of dense matrix + */ + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > + SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ); + + /** * \brief Assignment of any other matrix type. * @param matrix diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 6c0655ce0..d38b9de34 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -183,7 +183,7 @@ getCompressedRowLengths( Vector& rowLengths ) const rowLengths.setSize( this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { @@ -448,7 +448,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); @@ -615,7 +615,108 @@ operator=( const SparseMatrix& matrix ) return *this; } -// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +{ + using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + typename RHSMatrix::RowsCapacitiesType rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); + this->setCompressedRowLengths( rowLengths ); + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + columns_view = paddingIndex; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto this_segments_view = this->segments.getView(); + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) + { + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); + columns_view[ thisGlobalIdx ] = columnIndex; + values_view[ thisGlobalIdx ] = value; + } + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = paddingIndex; + matrixColumnsBuffer_view = paddingIndex; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + if( columnIndex != paddingIndex ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; + } + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; + if( column != paddingIndex ) + { + columnIndex = column; + value = thisValuesBuffer_view[ bufferIdx ]; + } + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + //std::cerr << "This matrix = " << std::endl << *this << std::endl; + } + return *this; + +} + template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index f00daf1f3..6c4f8b261 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -436,6 +437,55 @@ void testConversion() } } +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix; + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); @@ -568,6 +618,39 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) } #endif -#endif +// Dense matrix assignment test +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) +{ + denseMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host ) +{ + denseMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host ) +{ + denseMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda ) +{ + denseMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda ) +{ + denseMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda ) +{ + denseMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST #include "../main.h" -- GitLab From ad33eecb09bf3f85f18eeb921e8a2075203d3ec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 6 Jan 2020 21:20:22 +0100 Subject: [PATCH 068/179] Implemented dense to sparse matrix assignement. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 4 +- src/TNL/Matrices/Dense.h | 16 +---- src/TNL/Matrices/Dense.hpp | 40 ++++-------- src/TNL/Matrices/SparseMatrix.h | 1 + src/TNL/Matrices/SparseMatrix.hpp | 62 ++++++++++--------- src/TNL/Matrices/SparseMatrixView.hpp | 2 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 18 +++--- 8 files changed, 66 insertions(+), 79 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 971754b5a..3581748fa 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -218,7 +218,7 @@ void CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 31f417df2..62e2ca7d5 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes ) const auto sizes_view = sizes.getConstView(); auto slices_view = this->sliceOffsets.getView(); auto slice_segment_size_view = this->sliceSegmentSizes.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { if( globalIdx < _size ) return sizes_view[ globalIdx ]; return 0; @@ -341,7 +341,7 @@ void SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 2e283c9e8..757fa4eae 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -30,15 +30,6 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { - private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - - // friend class will be needed for templated assignment operators - //template< typename Real2, typename Device2, typename Index2 > - //friend class Dense; - public: using RealType = Real; using DeviceType = Device; @@ -176,12 +167,11 @@ class Dense : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment - Dense& operator=( const Dense& matrix ); + //Dense& operator=( const Dense& matrix ); // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > + Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index ecd5aec1c..7517c6b0e 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -373,7 +373,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); @@ -959,39 +959,25 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } - -// copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix ) -{ - this->setLike( matrix ); - this->values = matrix.values; - return *this; -} - -// cross-device copy assignment -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > - template< typename Real2, typename Device2, typename Index2, typename > -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& -Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, - "unknown device" ); - - this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); + if( RowMajorOrder == RowMajorOrder_ ) + { + this->setLike( matrix ); + this->values = matrix.getValues(); + } + else + { + + } } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 75d917928..44883a124 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace TNL { namespace Matrices { diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index d38b9de34..6aa75995f 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -448,10 +448,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) - return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); @@ -594,7 +594,7 @@ performSORIteration( const Vector1& b, Vector2& x, const RealType& omega ) const { - + return false; } // copy assignment @@ -624,7 +624,8 @@ template< typename Real, typename IndexAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& -SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) { using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; using RHSIndexType = typename RHSMatrix::IndexType; @@ -632,27 +633,29 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; - typename RHSMatrix::RowsCapacitiesType rowLengths; + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); - this->setDimensions( matrix.getRows(), matrix.getColumns() ); + this->setLike( matrix ); this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; if( std::is_same< DeviceType, RHSDeviceType >::value ) { - const auto this_segments_view = this->segments.getView(); const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { - if( columnIndex != paddingIndex ) + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value ) mutable { + if( value != 0.0 ) { - IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); - columns_view[ thisGlobalIdx ] = columnIndex; + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); + columns_view[ thisGlobalIdx ] = columnIdx; values_view[ thisGlobalIdx ] = value; } }; @@ -660,15 +663,13 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea } else { - const IndexType maxRowLength = max( rowLengths ); + const IndexType maxRowLength = matrix.getColumns(); const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); - Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); - auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); @@ -678,34 +679,40 @@ SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, Rea { const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); thisColumnsBuffer = paddingIndex; - matrixColumnsBuffer_view = paddingIndex; //// // Copy matrix elements into buffer auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { - if( columnIndex != paddingIndex ) - { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; - matrixValuesBuffer_view[ bufferIdx ] = value; - } + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; }; matrix.forRows( baseRow, lastRow, f1 ); //// // Copy the source matrix buffer to this matrix buffer thisValuesBuffer_view = matrixValuesBuffer_view; - thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy matrix elements from the buffer to the matrix + const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; - if( column != paddingIndex ) + RealType inValue( 0.0 ); + IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] ); + while( inValue == 0.0 && column < matrix_columns ) { - columnIndex = column; - value = thisValuesBuffer_view[ bufferIdx ]; + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + column++; + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + rowLocalIndexes_view[ rowIdx ] = column; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = column - 1; + value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); @@ -749,7 +756,6 @@ operator=( const RHSMatrix& matrix ) if( std::is_same< DeviceType, RHSDeviceType >::value ) { - const auto this_segments_view = this->segments.getView(); const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { if( columnIndex != paddingIndex ) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index df136388e..d836fe5e9 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -494,7 +494,7 @@ performSORIteration( const Vector1& b, Vector2& x, const RealType& omega ) const { - + return false; } template< typename Real, diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 6c4f8b261..8677443b2 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -443,22 +443,22 @@ void denseMatrixAssignment() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; - + const IndexType rows( 10 ), columns( 10 ); DenseHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) hostMatrix( i, j ) = i + j; - + Matrix matrix; matrix = hostMatrix; using RowCapacitiesType = typename Matrix::RowsCapacitiesType; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); - RowCapacitiesType exactRowLengths{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; EXPECT_EQ( rowCapacities, exactRowLengths ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) @@ -468,10 +468,14 @@ void denseMatrixAssignment() else EXPECT_EQ( matrix.getElement( i, j ), i + j ); } - + #ifdef HAVE_CUDA - DenseCuda cudaMatrix; - cudaMatrix = hostMatrix; + DenseCuda cudaMatrix( rows, columns ); + //cudaMatrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); EXPECT_EQ( rowCapacities, exactRowLengths ); -- GitLab From fe457c9765fffac0090a51fb48600a9238c24857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 6 Jan 2020 22:04:40 +0100 Subject: [PATCH 069/179] Fixed segments reduction lambda function parameters. --- src/TNL/Containers/Segments/CSRView.hpp | 5 +++-- src/TNL/Containers/Segments/EllpackView.hpp | 8 +++++--- src/TNL/Containers/Segments/SlicedEllpackView.hpp | 8 +++++--- src/TNL/Matrices/SparseMatrix.hpp | 6 +++--- src/UnitTests/Containers/Segments/SegmentsTest.hpp | 2 +- src/UnitTests/Matrices/SparseMatrixTest.hpp | 4 ++-- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index bbed8e3cb..2d2b58331 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -195,15 +195,16 @@ void CSRView< Device, Index >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; const IndexType end = offsetsView[ i + 1 ]; RealType aux( zero ); + IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) - reduction( aux, fetch( i, j, compute, args... ) ); + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index dc6bd485d..21be88654 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -245,7 +245,7 @@ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; @@ -253,9 +253,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); + IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) - reduction( aux, fetch( i, j, compute, args... ) ); + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -268,9 +269,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = i; const IndexType end = storageSize; RealType aux( zero ); + IndexType localIdx( 0 ); bool compute( true ); for( IndexType j = begin; j < end && compute; j += alignedSize ) - reduction( aux, fetch( i, j, compute, args... ) ); + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 82570664f..5f9cbdee3 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -293,7 +293,7 @@ void SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) @@ -305,9 +305,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; const IndexType end = begin + segmentSize; RealType aux( zero ); + IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); @@ -321,9 +322,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; RealType aux( zero ); + IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) - reduction( aux, fetch( segmentIdx, globalIdx, compute, args... ) ); + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); keeper( segmentIdx, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 6aa75995f..8dbe53f4d 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -417,12 +417,12 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { - const IndexType column = columnIndexesView[ offset ]; + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; - return valuesView[ offset ] * inVectorView[ column ]; + return valuesView[ globalIdx ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 6189c2e9a..8320fafe5 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -143,7 +143,7 @@ void test_AllReduction_MaximumInSegments() const auto v_view = v.getConstView(); auto result_view = result.getView(); - auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { return v_view[ globalIdx ]; }; auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index bf261aa84..b0a9fcb00 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -1214,7 +1214,7 @@ void test_RowsReduction() // Compute number of non-zero elements in rows. typename Matrix::RowsCapacitiesType rowLengths( rows ); auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { @@ -1232,7 +1232,7 @@ void test_RowsReduction() // Compute max norm TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); auto rowSums_view = rowSums.getView(); - auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return abs( value ); }; auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { -- GitLab From 50b1a44a6faecbedeb14e761dfb726281e5a2d7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 6 Jan 2020 22:45:42 +0100 Subject: [PATCH 070/179] Added dense matrix assignment test. --- src/UnitTests/Matrices/DenseMatrixTest.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 897861f7f..2ddd19c7a 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -1166,6 +1166,12 @@ void test_PerformSORIteration() EXPECT_EQ( xVector[ 3 ], 0.3671875 ); } +template< typename Matrix > +void test_AssignmentOperator() +{ + EXPECT_EQ( 1, 0 ); +} + template< typename Matrix > void test_SaveAndLoad() { -- GitLab From 74c5c158f7e76b7d6bad578de8ab4290715a9ca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 7 Jan 2020 23:10:48 +0100 Subject: [PATCH 071/179] Implemented dense matrix assignment operator. --- src/TNL/Matrices/Dense.h | 3 +- src/TNL/Matrices/Dense.hpp | 98 ++++++++++++++++++++++-- src/UnitTests/Matrices/DenseMatrixTest.h | 51 +++++++++++- 3 files changed, 143 insertions(+), 9 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 757fa4eae..9c05297d1 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index > using RealType = Real; using DeviceType = Device; using IndexType = Index; - using BaseType = Matrix< Real, Device, Index >; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 7517c6b0e..7a6c4becc 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); - } template< typename Real, @@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); - } template< typename Real, @@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } +/*template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) +{ + const IndexType bufferRowsCount( 128 ); + const IndexType columns = this->getColumns(); + const size_t bufferSize = bufferRowsCount * columns; + Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); + auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); + auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + sourceValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + destinationValuesBuffer = sourceValuesBuffer; + + //// + // Copy buffer to this matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + value = destinationValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + return *this; +}*/ + template< typename Real, typename Device, typename Index, @@ -969,15 +1011,57 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { + this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) - { - this->setLike( matrix ); this->values = matrix.getValues(); - } else { - + if( std::is_same< DeviceType, Device_ >::value ) + { + auto this_view = this->getView(); + auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + this_view.getRow( rowIdx ).setElement( columnIdx, value ); + }; + matrix.forAllRows( f ); + } + else + { + const IndexType bufferRowsCount( 128 ); + const IndexType columns = this->getColumns(); + const size_t bufferSize = bufferRowsCount * columns; + Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); + auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); + auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + sourceValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + destinationValuesBuffer = sourceValuesBuffer; + + //// + // Copy buffer to this matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; + value = destinationValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + } } + return *this; } template< typename Real, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 2ddd19c7a..686602ebd 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -1169,9 +1169,51 @@ void test_PerformSORIteration() template< typename Matrix > void test_AssignmentOperator() { - EXPECT_EQ( 1, 0 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif } + template< typename Matrix > void test_SaveAndLoad() { @@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType; -- GitLab From 09492f416df5bc2883246727b25e3eda67ddc544 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 15:23:13 +0100 Subject: [PATCH 072/179] Temporarily disabling few dense matrix unit tests. --- src/UnitTests/Matrices/DenseMatrixTest.h | 355 ++++++++++++----------- 1 file changed, 178 insertions(+), 177 deletions(-) diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 686602ebd..8c3132caf 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -26,7 +26,7 @@ using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >; static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include #include @@ -36,7 +36,7 @@ void host_test_GetType() { MatrixHostFloat mtrxHostFloat; MatrixHostInt mtrxHostInt; - + EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) ); EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); } @@ -57,13 +57,13 @@ void test_SetDimensions() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 9; const IndexType cols = 8; - + Matrix m; m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); } @@ -74,20 +74,20 @@ void test_SetLike() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix1 m1; m1.reset(); m1.setDimensions( rows + 1, cols + 2 ); - + Matrix2 m2; m2.reset(); m2.setDimensions( rows, cols ); - + m1.setLike( m2 ); - + EXPECT_EQ( m1.getRows(), m2.getRows() ); EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } @@ -150,14 +150,14 @@ void test_GetRowLength() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRowLength( 0 ), 7 ); EXPECT_EQ( m.getRowLength( 1 ), 7 ); EXPECT_EQ( m.getRowLength( 2 ), 7 ); @@ -174,14 +174,14 @@ void test_GetNumberOfMatrixElements() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); } @@ -191,7 +191,7 @@ void test_GetNumberOfNonzeroMatrixElements() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 7x6 dense matrix: * @@ -205,19 +205,19 @@ void test_GetNumberOfNonzeroMatrixElements() */ const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. - + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); } @@ -227,7 +227,7 @@ void test_Reset() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 dense matrix: * @@ -239,12 +239,12 @@ void test_Reset() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.setDimensions( rows, cols ); - + m.reset(); - + EXPECT_EQ( m.getRows(), 0 ); EXPECT_EQ( m.getColumns(), 0 ); } @@ -254,7 +254,7 @@ void test_SetValue() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using IndexType = typename Matrix::IndexType; /* * Sets up the following 7x6 dense matrix: * @@ -268,110 +268,110 @@ void test_SetValue() */ const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 7 ); EXPECT_EQ( m.getElement( 1, 1 ), 8 ); EXPECT_EQ( m.getElement( 1, 2 ), 9 ); EXPECT_EQ( m.getElement( 1, 3 ), 10 ); EXPECT_EQ( m.getElement( 1, 4 ), 11 ); EXPECT_EQ( m.getElement( 1, 5 ), 12 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 13 ); EXPECT_EQ( m.getElement( 2, 1 ), 14 ); EXPECT_EQ( m.getElement( 2, 2 ), 15 ); EXPECT_EQ( m.getElement( 2, 3 ), 16 ); EXPECT_EQ( m.getElement( 2, 4 ), 17 ); EXPECT_EQ( m.getElement( 2, 5 ), 18 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 19 ); EXPECT_EQ( m.getElement( 3, 1 ), 20 ); EXPECT_EQ( m.getElement( 3, 2 ), 21 ); EXPECT_EQ( m.getElement( 3, 3 ), 22 ); EXPECT_EQ( m.getElement( 3, 4 ), 23 ); EXPECT_EQ( m.getElement( 3, 5 ), 24 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 25 ); EXPECT_EQ( m.getElement( 4, 1 ), 26 ); EXPECT_EQ( m.getElement( 4, 2 ), 27 ); EXPECT_EQ( m.getElement( 4, 3 ), 28 ); EXPECT_EQ( m.getElement( 4, 4 ), 29 ); EXPECT_EQ( m.getElement( 4, 5 ), 30 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 31 ); EXPECT_EQ( m.getElement( 5, 1 ), 32 ); EXPECT_EQ( m.getElement( 5, 2 ), 33 ); EXPECT_EQ( m.getElement( 5, 3 ), 34 ); EXPECT_EQ( m.getElement( 5, 4 ), 35 ); EXPECT_EQ( m.getElement( 5, 5 ), 36 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 37 ); EXPECT_EQ( m.getElement( 6, 1 ), 38 ); EXPECT_EQ( m.getElement( 6, 2 ), 39 ); EXPECT_EQ( m.getElement( 6, 3 ), 40 ); EXPECT_EQ( m.getElement( 6, 4 ), 41 ); EXPECT_EQ( m.getElement( 6, 5 ), 42 ); - + // Set the values of all elements to a certain number m.setValue( 42 ); - + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); EXPECT_EQ( m.getElement( 0, 1 ), 42 ); EXPECT_EQ( m.getElement( 0, 2 ), 42 ); EXPECT_EQ( m.getElement( 0, 3 ), 42 ); EXPECT_EQ( m.getElement( 0, 4 ), 42 ); EXPECT_EQ( m.getElement( 0, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); EXPECT_EQ( m.getElement( 1, 1 ), 42 ); EXPECT_EQ( m.getElement( 1, 2 ), 42 ); EXPECT_EQ( m.getElement( 1, 3 ), 42 ); EXPECT_EQ( m.getElement( 1, 4 ), 42 ); EXPECT_EQ( m.getElement( 1, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 42 ); EXPECT_EQ( m.getElement( 2, 1 ), 42 ); EXPECT_EQ( m.getElement( 2, 2 ), 42 ); EXPECT_EQ( m.getElement( 2, 3 ), 42 ); EXPECT_EQ( m.getElement( 2, 4 ), 42 ); EXPECT_EQ( m.getElement( 2, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 42 ); EXPECT_EQ( m.getElement( 3, 1 ), 42 ); EXPECT_EQ( m.getElement( 3, 2 ), 42 ); EXPECT_EQ( m.getElement( 3, 3 ), 42 ); EXPECT_EQ( m.getElement( 3, 4 ), 42 ); EXPECT_EQ( m.getElement( 3, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 42 ); EXPECT_EQ( m.getElement( 4, 1 ), 42 ); EXPECT_EQ( m.getElement( 4, 2 ), 42 ); EXPECT_EQ( m.getElement( 4, 3 ), 42 ); EXPECT_EQ( m.getElement( 4, 4 ), 42 ); EXPECT_EQ( m.getElement( 4, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 42 ); EXPECT_EQ( m.getElement( 5, 1 ), 42 ); EXPECT_EQ( m.getElement( 5, 2 ), 42 ); EXPECT_EQ( m.getElement( 5, 3 ), 42 ); EXPECT_EQ( m.getElement( 5, 4 ), 42 ); EXPECT_EQ( m.getElement( 5, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 42 ); EXPECT_EQ( m.getElement( 6, 1 ), 42 ); EXPECT_EQ( m.getElement( 6, 2 ), 42 ); @@ -397,40 +397,40 @@ void test_SetElement() */ const IndexType rows = 5; const IndexType cols = 5; - + Matrix m; m.reset(); - m.setDimensions( rows, cols ); - + m.setDimensions( rows, cols ); + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 23 ); @@ -456,53 +456,53 @@ void test_AddElement() */ const IndexType rows = 6; const IndexType cols = 5; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 23 ); EXPECT_EQ( m.getElement( 4, 3 ), 24 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); EXPECT_EQ( m.getElement( 5, 1 ), 27 ); EXPECT_EQ( m.getElement( 5, 2 ), 28 ); EXPECT_EQ( m.getElement( 5, 3 ), 29 ); EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - + // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 dense matrix: @@ -518,38 +518,38 @@ void test_AddElement() RealType multiplicator = 2; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.addElement( i, j, newValue++, multiplicator ); - + m.addElement( i, j, newValue++, multiplicator ); + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); EXPECT_EQ( m.getElement( 0, 2 ), 9 ); EXPECT_EQ( m.getElement( 0, 3 ), 12 ); EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); EXPECT_EQ( m.getElement( 1, 1 ), 21 ); EXPECT_EQ( m.getElement( 1, 2 ), 24 ); EXPECT_EQ( m.getElement( 1, 3 ), 27 ); EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); EXPECT_EQ( m.getElement( 2, 1 ), 36 ); EXPECT_EQ( m.getElement( 2, 2 ), 39 ); EXPECT_EQ( m.getElement( 2, 3 ), 42 ); EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); EXPECT_EQ( m.getElement( 3, 1 ), 51 ); EXPECT_EQ( m.getElement( 3, 2 ), 54 ); EXPECT_EQ( m.getElement( 3, 3 ), 57 ); EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); EXPECT_EQ( m.getElement( 4, 1 ), 66 ); EXPECT_EQ( m.getElement( 4, 2 ), 69 ); EXPECT_EQ( m.getElement( 4, 3 ), 72 ); EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); EXPECT_EQ( m.getElement( 5, 1 ), 81 ); EXPECT_EQ( m.getElement( 5, 2 ), 84 ); @@ -718,37 +718,37 @@ void test_AddRow() }; TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); - + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); EXPECT_EQ( m.getElement( 0, 2 ), 11 ); EXPECT_EQ( m.getElement( 0, 3 ), 11 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); EXPECT_EQ( m.getElement( 1, 1 ), 29 ); EXPECT_EQ( m.getElement( 1, 2 ), 30 ); EXPECT_EQ( m.getElement( 1, 3 ), 31 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 55 ); EXPECT_EQ( m.getElement( 2, 1 ), 57 ); EXPECT_EQ( m.getElement( 2, 2 ), 59 ); EXPECT_EQ( m.getElement( 2, 3 ), 61 ); EXPECT_EQ( m.getElement( 2, 4 ), 30 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 92 ); EXPECT_EQ( m.getElement( 3, 1 ), 95 ); EXPECT_EQ( m.getElement( 3, 2 ), 98 ); EXPECT_EQ( m.getElement( 3, 3 ), 101 ); EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 139 ); EXPECT_EQ( m.getElement( 4, 1 ), 143 ); EXPECT_EQ( m.getElement( 4, 2 ), 147 ); EXPECT_EQ( m.getElement( 4, 3 ), 151 ); EXPECT_EQ( m.getElement( 4, 4 ), 100 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 196 ); EXPECT_EQ( m.getElement( 5, 1 ), 201 ); EXPECT_EQ( m.getElement( 5, 2 ), 206 ); @@ -773,31 +773,31 @@ void test_VectorProduct() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - + VectorType inVector; inVector.setSize( 4 ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) + for( IndexType i = 0; i < inVector.getSize(); i++ ) inVector.setElement( i, 2 ); - VectorType outVector; + VectorType outVector; outVector.setSize( 5 ); for( IndexType j = 0; j < outVector.getSize(); j++ ) outVector.setElement( j, 0 ); - - + + m.vectorProduct( inVector, outVector); - + EXPECT_EQ( outVector.getElement( 0 ), 20 ); EXPECT_EQ( outVector.getElement( 1 ), 52 ); EXPECT_EQ( outVector.getElement( 2 ), 84 ); @@ -822,16 +822,16 @@ void test_AddMatrix() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); - + /* * Sets up the following 5x4 dense matrix: * @@ -841,16 +841,16 @@ void test_AddMatrix() * | 13 14 15 16 | * \ 17 18 19 20 / */ - + Matrix m2; m2.reset(); m2.setDimensions( rows, cols ); - + RealType newValue = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m2.setElement( i, j, newValue++ ); - + /* * Sets up the following 5x4 dense matrix: * @@ -860,63 +860,63 @@ void test_AddMatrix() * | 13 14 15 16 | * \ 17 18 19 20 / */ - + Matrix mResult; mResult.reset(); mResult.setDimensions( rows, cols ); - + mResult = m; - + RealType matrixMultiplicator = 2; RealType thisMatrixMultiplicator = 1; - + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); @@ -940,16 +940,16 @@ void test_GetMatrixProduct() */ const IndexType leftRows = 5; const IndexType leftCols = 4; - + Matrix leftMatrix; leftMatrix.reset(); leftMatrix.setDimensions( leftRows, leftCols ); - + RealType value = 1; for( IndexType i = 0; i < leftRows; i++ ) for( IndexType j = 0; j < leftCols; j++) leftMatrix.setElement( i, j, value++ ); - + /* * Sets up the following 4x5 dense matrix: * @@ -960,16 +960,16 @@ void test_GetMatrixProduct() */ const IndexType rightRows = 4; const IndexType rightCols = 5; - + Matrix rightMatrix; rightMatrix.reset(); rightMatrix.setDimensions( rightRows, rightCols ); - + RealType newValue = 1; for( IndexType i = 0; i < rightRows; i++ ) for( IndexType j = 0; j < rightCols; j++) rightMatrix.setElement( i, j, newValue++ ); - + /* * Sets up the following 5x5 resulting dense matrix: * @@ -979,48 +979,48 @@ void test_GetMatrixProduct() * | 0 0 0 0 | * \ 0 0 0 0 / */ - + Matrix mResult; mResult.reset(); mResult.setDimensions( leftRows, rightCols ); mResult.setValue( 0 ); - + RealType leftMatrixMultiplicator = 1; RealType rightMatrixMultiplicator = 2; -/* +/* * / 1 2 3 4 \ / 220 240 260 280 300 \ * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / */ - + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); @@ -1054,36 +1054,36 @@ void test_GetTransposition() m.setElement( i, j, value++ ); m.print( std::cout ); - + /* * Sets up the following 2x3 dense matrix: * * / 0 0 0 \ * \ 0 0 0 / - */ + */ Matrix mTransposed; mTransposed.reset(); mTransposed.setDimensions( cols, rows ); - + mTransposed.print( std::cout ); - + RealType matrixMultiplicator = 1; - + mTransposed.getTransposition( m, matrixMultiplicator ); - + mTransposed.print( std::cout ); - + /* * Should result in the following 2x3 dense matrix: * * / 1 3 5 \ * \ 2 4 6 / - */ - + */ + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); - + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); @@ -1106,60 +1106,60 @@ void test_PerformSORIteration() */ const IndexType rows = 4; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0 ); m.setElement( 0, 2, 1.0 ); m.setElement( 0, 3, 1.0 ); - + m.setElement( 1, 0, 1.0 ); // 1st row m.setElement( 1, 1, 4.0 ); m.setElement( 1, 2, 1.0 ); m.setElement( 1, 3, 1.0 ); - + m.setElement( 2, 0, 1.0 ); m.setElement( 2, 1, 1.0 ); // 2nd row m.setElement( 2, 2, 4.0 ); m.setElement( 2, 3, 1.0 ); - + m.setElement( 3, 0, 1.0 ); // 3rd row m.setElement( 3, 1, 1.0 ); m.setElement( 3, 2, 1.0 ); m.setElement( 3, 3, 4.0 ); - + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; - + IndexType row = 0; RealType omega = 1; - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], 1.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 0.15625 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 0.15625 ); @@ -1230,59 +1230,59 @@ void test_SaveAndLoad() */ const IndexType rows = 4; const IndexType cols = 4; - + Matrix savedMatrix; savedMatrix.reset(); savedMatrix.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) savedMatrix.setElement( i, j, value++ ); - + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - + Matrix loadedMatrix; loadedMatrix.reset(); loadedMatrix.setDimensions( rows, cols ); - + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); @@ -1306,33 +1306,33 @@ void test_Print() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); - + #include std::stringstream printed; std::stringstream couted; - + //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + auto old_buf = std::cout.rdbuf(printed.rdbuf()); m.print( std::cout ); //all the std::cout goes to ss std::cout.rdbuf(old_buf); //reset - + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; - + EXPECT_EQ( printed.str(), couted.str() ); } @@ -1380,91 +1380,91 @@ TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); TYPED_TEST( MatrixTest, setDimensionsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetDimensions< MatrixType >(); } TYPED_TEST( MatrixTest, setLikeTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetLike< MatrixType, MatrixType >(); } TYPED_TEST( MatrixTest, getRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetRowLength< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetNumberOfMatrixElements< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); } TYPED_TEST( MatrixTest, resetTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_Reset< MatrixType >(); } TYPED_TEST( MatrixTest, setValueTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetValue< MatrixType >(); } TYPED_TEST( MatrixTest, setElementTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetElement< MatrixType >(); } TYPED_TEST( MatrixTest, addElementTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddElement< MatrixType >(); } TYPED_TEST( MatrixTest, setRowTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetRow< MatrixType >(); } TYPED_TEST( MatrixTest, addRowTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddRow< MatrixType >(); } TYPED_TEST( MatrixTest, vectorProductTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_VectorProduct< MatrixType >(); } TYPED_TEST( MatrixTest, addMatrixTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddMatrix< MatrixType >(); } @@ -1478,14 +1478,14 @@ TYPED_TEST( MatrixTest, assignmentOperatorTest ) TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SaveAndLoad< MatrixType >(); } TYPED_TEST( MatrixTest, printTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_Print< MatrixType >(); } @@ -1503,7 +1503,7 @@ TYPED_TEST( MatrixTest, printTest ) //} //#endif -TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) +/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) { bool testRan = false; EXPECT_TRUE( testRan ); @@ -1607,6 +1607,7 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda ) std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; } #endif + * */ #endif // HAVE_GTEST -- GitLab From 55341d35b8e67f418868e15d780cd2f8f636fdcd Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 17:06:01 +0100 Subject: [PATCH 073/179] Fixing serialization type of dense matrix. --- src/TNL/Matrices/Dense.h | 4 ++-- src/TNL/Matrices/Dense.hpp | 20 +++++++++---------- src/TNL/Matrices/DenseMatrixView.hpp | 14 ++++++------- .../{Tridiagonal_impl.h => Tridiagonal.hpp} | 0 4 files changed, 19 insertions(+), 19 deletions(-) rename src/TNL/Matrices/{Tridiagonal_impl.h => Tridiagonal.hpp} (100%) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 9c05297d1..778fd0bd4 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -56,7 +56,7 @@ class Dense : public Matrix< Real, Device, Index > Dense(); Dense( const IndexType rows, const IndexType columns ); - + ViewType getView(); ConstViewType getConstView() const; @@ -179,7 +179,7 @@ class Dense : public Matrix< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; - + void save( const String& fileName ) const; void load( const String& fileName ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 7a6c4becc..49e218c77 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -15,7 +15,7 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, @@ -46,7 +46,7 @@ auto Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getView() -> ViewType { - return ViewType( this->getRows(), + return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->segments.getView() ); @@ -77,9 +77,9 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getSerializationType() { return String( "Matrices::Dense< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, @@ -99,7 +99,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) @@ -128,7 +128,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { @@ -322,7 +322,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Real +Real Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const @@ -447,7 +447,7 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > -void +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { @@ -857,7 +857,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTranspositio << "This matrix rows: " << this->getRows() << std::endl << "That matrix columns: " << matrix.getColumns() << std::endl << "That matrix rows: " << matrix.getRows() << std::endl ); - + if( std::is_same< Device, Devices::Host >::value ) { const IndexType& rows = matrix.getRows(); diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 08cfab843..48c0ccdc3 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -15,7 +15,7 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, @@ -50,7 +50,7 @@ auto DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getView() -> ViewType { - return ViewType( this->getRows(), + return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->columnIndexes.getView(), @@ -82,9 +82,9 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getSerializationType() { return String( "Matrices::Dense< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, @@ -271,7 +271,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -Real +Real DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getElement( const IndexType row, const IndexType column ) const @@ -783,7 +783,7 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co << "This matrix rows: " << this->getRows() << std::endl << "That matrix columns: " << matrix.getColumns() << std::endl << "That matrix rows: " << matrix.getRows() << std::endl ); - + if( std::is_same< Device, Devices::Host >::value ) { const IndexType& rows = matrix.getRows(); diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal.hpp similarity index 100% rename from src/TNL/Matrices/Tridiagonal_impl.h rename to src/TNL/Matrices/Tridiagonal.hpp -- GitLab From 2cc4680c7564f2f062781eef3d4775368b98b5d1 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 17:38:17 +0100 Subject: [PATCH 074/179] Fixed Matrices::Dense::getSerializationType unit test. --- src/TNL/Matrices/Dense.hpp | 2 +- src/UnitTests/Matrices/DenseMatrixTest.h | 31 +- .../Matrices/TridiagonalMatrixTest.cpp | 11 + .../Matrices/TridiagonalMatrixTest.cu | 11 + .../Matrices/TridiagonalMatrixTest.h | 1614 +++++++++++++++++ 5 files changed, 1651 insertions(+), 18 deletions(-) create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.cpp create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.cu create mode 100644 src/UnitTests/Matrices/TridiagonalMatrixTest.h diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 49e218c77..c4deeb6fa 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -78,7 +78,7 @@ getSerializationType() { return String( "Matrices::Dense< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + + TNL::getSerializationType< IndexType >() + ", " + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 8c3132caf..183783ea3 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -31,24 +31,16 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; #include -template< typename MatrixHostFloat, typename MatrixHostInt > -void host_test_GetType() +void test_GetSerializationType() { - MatrixHostFloat mtrxHostFloat; - MatrixHostInt mtrxHostInt; - - EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) ); - EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); -} - -template< typename MatrixCudaFloat, typename MatrixCudaInt > -void cuda_test_GetType() -{ - MatrixCudaFloat mtrxCudaFloat; - MatrixCudaInt mtrxCudaInt; - - EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); - EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > @@ -1377,6 +1369,11 @@ using MatrixTypes = ::testing::Types TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); +TYPED_TEST( MatrixTest, getSerializationType ) +{ + test_GetSerializationType(); +} + TYPED_TEST( MatrixTest, setDimensionsTest ) { using MatrixType = typename TestFixture::MatrixType; diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp new file mode 100644 index 000000000..a56349360 --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixTest.cpp - description + ------------------- + begin : Nov 10, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu new file mode 100644 index 000000000..11d45efdb --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixTest.cu - description + ------------------- + begin : Nov 10, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h new file mode 100644 index 000000000..8c3132caf --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -0,0 +1,1614 @@ +/*************************************************************************** + DenseMatrixTest.h - description + ------------------- + begin : Nov 10, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +using Dense_host_float = TNL::Matrices::Dense< float, TNL::Devices::Host, int >; +using Dense_host_int = TNL::Matrices::Dense< int, TNL::Devices::Host, int >; + +using Dense_cuda_float = TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >; +using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >; + +static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; + +#ifdef HAVE_GTEST +#include + +#include + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + MatrixHostFloat mtrxHostFloat; + MatrixHostInt mtrxHostInt; + + EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) ); + EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + MatrixCudaFloat mtrxCudaFloat; + MatrixCudaInt mtrxCudaInt; + + EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); + EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); + + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix > +void test_GetRowLength() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRowLength( 0 ), 7 ); + EXPECT_EQ( m.getRowLength( 1 ), 7 ); + EXPECT_EQ( m.getRowLength( 2 ), 7 ); + EXPECT_EQ( m.getRowLength( 3 ), 7 ); + EXPECT_EQ( m.getRowLength( 4 ), 7 ); + EXPECT_EQ( m.getRowLength( 5 ), 7 ); + EXPECT_EQ( m.getRowLength( 6 ), 7 ); + EXPECT_EQ( m.getRowLength( 7 ), 7 ); +} + +template< typename Matrix > +void test_GetNumberOfMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 7x6 dense matrix: + * + * / 0 2 3 4 5 6 \ + * | 7 8 9 10 11 12 | + * | 13 14 15 16 17 18 | + * | 19 20 21 22 23 24 | + * | 25 26 27 28 29 30 | + * | 31 32 33 34 35 36 | + * \ 37 38 39 40 41 0 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. + m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 dense matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.setDimensions( rows, cols ); + + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_SetValue() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 7x6 dense matrix: + * + * / 1 2 3 4 5 6 \ + * | 7 8 9 10 11 12 | + * | 13 14 15 16 17 18 | + * | 19 20 21 22 23 24 | + * | 25 26 27 28 29 30 | + * | 31 32 33 34 35 36 | + * \ 37 38 39 40 41 42 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 7 ); + EXPECT_EQ( m.getElement( 1, 1 ), 8 ); + EXPECT_EQ( m.getElement( 1, 2 ), 9 ); + EXPECT_EQ( m.getElement( 1, 3 ), 10 ); + EXPECT_EQ( m.getElement( 1, 4 ), 11 ); + EXPECT_EQ( m.getElement( 1, 5 ), 12 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 13 ); + EXPECT_EQ( m.getElement( 2, 1 ), 14 ); + EXPECT_EQ( m.getElement( 2, 2 ), 15 ); + EXPECT_EQ( m.getElement( 2, 3 ), 16 ); + EXPECT_EQ( m.getElement( 2, 4 ), 17 ); + EXPECT_EQ( m.getElement( 2, 5 ), 18 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 19 ); + EXPECT_EQ( m.getElement( 3, 1 ), 20 ); + EXPECT_EQ( m.getElement( 3, 2 ), 21 ); + EXPECT_EQ( m.getElement( 3, 3 ), 22 ); + EXPECT_EQ( m.getElement( 3, 4 ), 23 ); + EXPECT_EQ( m.getElement( 3, 5 ), 24 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 25 ); + EXPECT_EQ( m.getElement( 4, 1 ), 26 ); + EXPECT_EQ( m.getElement( 4, 2 ), 27 ); + EXPECT_EQ( m.getElement( 4, 3 ), 28 ); + EXPECT_EQ( m.getElement( 4, 4 ), 29 ); + EXPECT_EQ( m.getElement( 4, 5 ), 30 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 31 ); + EXPECT_EQ( m.getElement( 5, 1 ), 32 ); + EXPECT_EQ( m.getElement( 5, 2 ), 33 ); + EXPECT_EQ( m.getElement( 5, 3 ), 34 ); + EXPECT_EQ( m.getElement( 5, 4 ), 35 ); + EXPECT_EQ( m.getElement( 5, 5 ), 36 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 37 ); + EXPECT_EQ( m.getElement( 6, 1 ), 38 ); + EXPECT_EQ( m.getElement( 6, 2 ), 39 ); + EXPECT_EQ( m.getElement( 6, 3 ), 40 ); + EXPECT_EQ( m.getElement( 6, 4 ), 41 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); + + // Set the values of all elements to a certain number + m.setValue( 42 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); + EXPECT_EQ( m.getElement( 0, 1 ), 42 ); + EXPECT_EQ( m.getElement( 0, 2 ), 42 ); + EXPECT_EQ( m.getElement( 0, 3 ), 42 ); + EXPECT_EQ( m.getElement( 0, 4 ), 42 ); + EXPECT_EQ( m.getElement( 0, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); + EXPECT_EQ( m.getElement( 1, 1 ), 42 ); + EXPECT_EQ( m.getElement( 1, 2 ), 42 ); + EXPECT_EQ( m.getElement( 1, 3 ), 42 ); + EXPECT_EQ( m.getElement( 1, 4 ), 42 ); + EXPECT_EQ( m.getElement( 1, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 42 ); + EXPECT_EQ( m.getElement( 2, 1 ), 42 ); + EXPECT_EQ( m.getElement( 2, 2 ), 42 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 42 ); + EXPECT_EQ( m.getElement( 2, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 42 ); + EXPECT_EQ( m.getElement( 3, 1 ), 42 ); + EXPECT_EQ( m.getElement( 3, 2 ), 42 ); + EXPECT_EQ( m.getElement( 3, 3 ), 42 ); + EXPECT_EQ( m.getElement( 3, 4 ), 42 ); + EXPECT_EQ( m.getElement( 3, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 42 ); + EXPECT_EQ( m.getElement( 4, 1 ), 42 ); + EXPECT_EQ( m.getElement( 4, 2 ), 42 ); + EXPECT_EQ( m.getElement( 4, 3 ), 42 ); + EXPECT_EQ( m.getElement( 4, 4 ), 42 ); + EXPECT_EQ( m.getElement( 4, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 42 ); + EXPECT_EQ( m.getElement( 5, 1 ), 42 ); + EXPECT_EQ( m.getElement( 5, 2 ), 42 ); + EXPECT_EQ( m.getElement( 5, 3 ), 42 ); + EXPECT_EQ( m.getElement( 5, 4 ), 42 ); + EXPECT_EQ( m.getElement( 5, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 42 ); + EXPECT_EQ( m.getElement( 6, 1 ), 42 ); + EXPECT_EQ( m.getElement( 6, 2 ), 42 ); + EXPECT_EQ( m.getElement( 6, 3 ), 42 ); + EXPECT_EQ( m.getElement( 6, 4 ), 42 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); +} + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * \ 21 22 23 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. +/* + * The following setup results in the following 6x5 dense matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.addElement( i, j, newValue++, multiplicator ); + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 12 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); + EXPECT_EQ( m.getElement( 1, 3 ), 27 ); + EXPECT_EQ( m.getElement( 1, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 36 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 45 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); + EXPECT_EQ( m.getElement( 3, 1 ), 51 ); + EXPECT_EQ( m.getElement( 3, 2 ), 54 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); + EXPECT_EQ( m.getElement( 4, 1 ), 66 ); + EXPECT_EQ( m.getElement( 4, 2 ), 69 ); + EXPECT_EQ( m.getElement( 4, 3 ), 72 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); + EXPECT_EQ( m.getElement( 5, 1 ), 81 ); + EXPECT_EQ( m.getElement( 5, 2 ), 84 ); + EXPECT_EQ( m.getElement( 5, 3 ), 87 ); + EXPECT_EQ( m.getElement( 5, 4 ), 90 ); +} + +template< typename Matrix > +void test_SetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 3x7 dense matrix: + * + * / 1 2 3 4 5 6 7 \ + * | 8 9 10 11 12 13 14 | + * \ 15 16 17 18 19 20 21 / + */ + const IndexType rows = 3; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 3 ][ 5 ] { + { 11, 11, 11, 11, 11 }, + { 22, 22, 22, 22, 22 }, + { 33, 33, 33, 33, 33 } }; + IndexType columnIndexes[ 3 ][ 5 ] { + { 0, 1, 2, 3, 4 }, + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 5, 6 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 11 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + EXPECT_EQ( m.getElement( 0, 6 ), 7 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); + EXPECT_EQ( m.getElement( 1, 1 ), 22 ); + EXPECT_EQ( m.getElement( 1, 2 ), 22 ); + EXPECT_EQ( m.getElement( 1, 3 ), 22 ); + EXPECT_EQ( m.getElement( 1, 4 ), 22 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 14 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 2, 1 ), 16 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 33 ); + EXPECT_EQ( m.getElement( 2, 4 ), 33 ); + EXPECT_EQ( m.getElement( 2, 5 ), 33 ); + EXPECT_EQ( m.getElement( 2, 6 ), 33 ); +} + +template< typename Matrix > +void test_AddRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 5 ] { + { 11, 11, 11, 11, 0 }, + { 22, 22, 22, 22, 0 }, + { 33, 33, 33, 33, 0 }, + { 44, 44, 44, 44, 0 }, + { 55, 55, 55, 55, 0 }, + { 66, 66, 66, 66, 0 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 31 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 55 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 92 ); + EXPECT_EQ( m.getElement( 3, 1 ), 95 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 139 ); + EXPECT_EQ( m.getElement( 4, 1 ), 143 ); + EXPECT_EQ( m.getElement( 4, 2 ), 147 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 100 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 196 ); + EXPECT_EQ( m.getElement( 5, 1 ), 201 ); + EXPECT_EQ( m.getElement( 5, 2 ), 206 ); + EXPECT_EQ( m.getElement( 5, 3 ), 211 ); + EXPECT_EQ( m.getElement( 5, 4 ), 150 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + VectorType inVector; + inVector.setSize( 4 ); + for( IndexType i = 0; i < inVector.getSize(); i++ ) + inVector.setElement( i, 2 ); + + VectorType outVector; + outVector.setSize( 5 ); + for( IndexType j = 0; j < outVector.getSize(); j++ ) + outVector.setElement( j, 0 ); + + + m.vectorProduct( inVector, outVector); + + EXPECT_EQ( outVector.getElement( 0 ), 20 ); + EXPECT_EQ( outVector.getElement( 1 ), 52 ); + EXPECT_EQ( outVector.getElement( 2 ), 84 ); + EXPECT_EQ( outVector.getElement( 3 ), 116 ); + EXPECT_EQ( outVector.getElement( 4 ), 148 ); +} + +template< typename Matrix > +void test_AddMatrix() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + + Matrix m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m2.setElement( i, j, newValue++ ); + + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + + Matrix mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 60 ); +} + +template< typename Matrix > +void test_GetMatrixProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType leftRows = 5; + const IndexType leftCols = 4; + + Matrix leftMatrix; + leftMatrix.reset(); + leftMatrix.setDimensions( leftRows, leftCols ); + + RealType value = 1; + for( IndexType i = 0; i < leftRows; i++ ) + for( IndexType j = 0; j < leftCols; j++) + leftMatrix.setElement( i, j, value++ ); + +/* + * Sets up the following 4x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * \ 16 17 18 19 20 / + */ + const IndexType rightRows = 4; + const IndexType rightCols = 5; + + Matrix rightMatrix; + rightMatrix.reset(); + rightMatrix.setDimensions( rightRows, rightCols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rightRows; i++ ) + for( IndexType j = 0; j < rightCols; j++) + rightMatrix.setElement( i, j, newValue++ ); + +/* + * Sets up the following 5x5 resulting dense matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + Matrix mResult; + mResult.reset(); + mResult.setDimensions( leftRows, rightCols ); + mResult.setValue( 0 ); + + RealType leftMatrixMultiplicator = 1; + RealType rightMatrixMultiplicator = 2; +/* + * / 1 2 3 4 \ / 220 240 260 280 300 \ + * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | + * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | + * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | + * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / + */ + + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); + EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); + EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); + EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); + EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 ); + EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 ); +} + +template< typename Matrix > +void test_GetTransposition() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 3x2 dense matrix: + * + * / 1 2 \ + * | 3 4 | + * \ 5 6 / + */ + const IndexType rows = 3; + const IndexType cols = 2; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.print( std::cout ); + +/* + * Sets up the following 2x3 dense matrix: + * + * / 0 0 0 \ + * \ 0 0 0 / + */ + Matrix mTransposed; + mTransposed.reset(); + mTransposed.setDimensions( cols, rows ); + + mTransposed.print( std::cout ); + + RealType matrixMultiplicator = 1; + + mTransposed.getTransposition( m, matrixMultiplicator ); + + mTransposed.print( std::cout ); + +/* + * Should result in the following 2x3 dense matrix: + * + * / 1 3 5 \ + * \ 2 4 6 / + */ + + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); + EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); + EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); + + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); + EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); + EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); +} + + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 4x4 dense matrix: + * + * / 4 1 1 1 \ + * | 1 4 1 1 | + * | 1 1 4 1 | + * \ 1 1 1 4 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0 ); + m.setElement( 0, 2, 1.0 ); + m.setElement( 0, 3, 1.0 ); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + m.setElement( 1, 3, 1.0 ); + + m.setElement( 2, 0, 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 0, 1.0 ); // 3rd row + m.setElement( 3, 1, 1.0 ); + m.setElement( 3, 2, 1.0 ); + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 0.3671875 ); +} + +template< typename Matrix > +void test_AssignmentOperator() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + + +template< typename Matrix > +void test_SaveAndLoad() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 4x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * \ 13 14 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix savedMatrix; + savedMatrix.reset(); + savedMatrix.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + savedMatrix.setElement( i, j, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + loadedMatrix.reset(); + loadedMatrix.setDimensions( rows, cols ); + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + + #include + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" + "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" + "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" + "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" + "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::Dense< int, TNL::Devices::Host, short >, + TNL::Matrices::Dense< long, TNL::Devices::Host, short >, + TNL::Matrices::Dense< float, TNL::Devices::Host, short >, + TNL::Matrices::Dense< double, TNL::Devices::Host, short >, + TNL::Matrices::Dense< int, TNL::Devices::Host, int >, + TNL::Matrices::Dense< long, TNL::Devices::Host, int >, + TNL::Matrices::Dense< float, TNL::Devices::Host, int >, + TNL::Matrices::Dense< double, TNL::Devices::Host, int >, + TNL::Matrices::Dense< int, TNL::Devices::Host, long >, + TNL::Matrices::Dense< long, TNL::Devices::Host, long >, + TNL::Matrices::Dense< float, TNL::Devices::Host, long >, + TNL::Matrices::Dense< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::Dense< int, TNL::Devices::Cuda, short >, + TNL::Matrices::Dense< long, TNL::Devices::Cuda, short >, + TNL::Matrices::Dense< float, TNL::Devices::Cuda, short >, + TNL::Matrices::Dense< double, TNL::Devices::Cuda, short >, + TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Dense< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Dense< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Dense< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Dense< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Dense< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Dense< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, getRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetRowLength< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setValueTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetValue< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addMatrixTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddMatrix< MatrixType >(); +} + +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >(); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +//// test_getType is not general enough yet. DO NOT TEST IT YET. + +//TEST( DenseMatrixTest, Dense_GetTypeTest_Host ) +//{ +// host_test_GetType< Dense_host_float, Dense_host_int >(); +//} +// +//#ifdef HAVE_CUDA +//TEST( DenseMatrixTest, Dense_GetTypeTest_Cuda ) +//{ +// cuda_test_GetType< Dense_cuda_float, Dense_cuda_int >(); +//} +//#endif + +/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(609): error: no instance of function template \"TNL::Matrices::DenseMatrixProductKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, Dense_host_int *, const int, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Dense::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense::RealType &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Dense_host_int, Matrix2=Dense_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Dense_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1315): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::DenseMatrixProductKernel(TNL::Matrices::Dense *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; + std::cout << " instantiation of \"void TNL::Matrices::Dense::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense::RealType &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Dense_cuda_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1332): here\n\n"; +} +#endif + +TEST( DenseMatrixTest, Dense_getTranspositionTest_Host ) +{ +// test_GetTransposition< Dense_host_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(836): error: no instance of function template \"TNL::Matrices::DenseTranspositionAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Dense::getTransposition(const Matrix &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Dense_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n"; + std::cout << "AND this message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(852): error: no instance of function template \"TNL::Matrices::DenseTranspositionNonAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Dense::getTransposition(const Matrix &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Dense_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixTest, Dense_getTranspositionTest_Cuda ) +{ +// test_GetTransposition< Dense_cuda_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test throws the following message: \n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n"; + std::cout << " what(): CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n"; + std::cout << " Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n"; + std::cout << " [1] 4003 abort (core dumped) ./DenseMatrixTest-dbg\n"; +} +#endif + +TEST( DenseMatrixTest, Dense_performSORIterationTest_Host ) +{ + test_PerformSORIteration< Dense_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda ) +{ +// test_PerformSORIteration< Dense_cuda_float >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched, this test throws the following message: \n"; + std::cout << " [1] 6992 segmentation fault (core dumped) ./SparseMatrixTest-dbg\n\n"; + std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; +} +#endif + * */ + +#endif // HAVE_GTEST + +#include "../main.h" -- GitLab From e12f893516ec2bc3b08447516306c8a1da7c65cc Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 18:27:21 +0100 Subject: [PATCH 075/179] Added tridiagonal matrix unit tests. --- .../Matrices/TridiagonalMatrixTest.cpp | 8 +- .../Matrices/TridiagonalMatrixTest.cu | 8 +- .../Matrices/TridiagonalMatrixTest.h | 191 +++++++++--------- 3 files changed, 102 insertions(+), 105 deletions(-) diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp index a56349360..3248d3526 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp @@ -1,11 +1,11 @@ /*************************************************************************** - DenseMatrixTest.cpp - description + TridiagonalMatrixTest.cpp - description ------------------- - begin : Nov 10, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include "DenseMatrixTest.h" \ No newline at end of file +#include "TridiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu index 11d45efdb..16f909fa7 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu @@ -1,11 +1,11 @@ /*************************************************************************** - DenseMatrixTest.cu - description + TridiagonalMatrixTest.cu - description ------------------- - begin : Nov 10, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include "DenseMatrixTest.h" \ No newline at end of file +#include "TridiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index 8c3132caf..40cecb2bd 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -1,8 +1,8 @@ /*************************************************************************** - DenseMatrixTest.h - description + TridiagonalMatrixTest.h - description ------------------- - begin : Nov 10, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -18,37 +18,29 @@ #include #include -using Dense_host_float = TNL::Matrices::Dense< float, TNL::Devices::Host, int >; -using Dense_host_int = TNL::Matrices::Dense< int, TNL::Devices::Host, int >; +using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >; +using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >; -using Dense_cuda_float = TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >; -using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >; +using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >; +using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >; -static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; +static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl"; #ifdef HAVE_GTEST #include #include -template< typename MatrixHostFloat, typename MatrixHostInt > -void host_test_GetType() +void test_GetSerializationType() { - MatrixHostFloat mtrxHostFloat; - MatrixHostInt mtrxHostInt; - - EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) ); - EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); -} - -template< typename MatrixCudaFloat, typename MatrixCudaInt > -void cuda_test_GetType() -{ - MatrixCudaFloat mtrxCudaFloat; - MatrixCudaInt mtrxCudaInt; - - EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); - EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > @@ -1173,11 +1165,11 @@ void test_AssignmentOperator() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; - using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; const IndexType rows( 10 ), columns( 10 ); - DenseHost hostMatrix( rows, columns ); + TridiagonalHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) hostMatrix( i, j ) = i + j; @@ -1195,7 +1187,7 @@ void test_AssignmentOperator() } #ifdef HAVE_CUDA - DenseCuda cudaMatrix( rows, columns ); + TridiagonalCuda cudaMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) cudaMatrix.setElement( i, j, i + j ); @@ -1347,36 +1339,41 @@ protected: // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types < - TNL::Matrices::Dense< int, TNL::Devices::Host, short >, - TNL::Matrices::Dense< long, TNL::Devices::Host, short >, - TNL::Matrices::Dense< float, TNL::Devices::Host, short >, - TNL::Matrices::Dense< double, TNL::Devices::Host, short >, - TNL::Matrices::Dense< int, TNL::Devices::Host, int >, - TNL::Matrices::Dense< long, TNL::Devices::Host, int >, - TNL::Matrices::Dense< float, TNL::Devices::Host, int >, - TNL::Matrices::Dense< double, TNL::Devices::Host, int >, - TNL::Matrices::Dense< int, TNL::Devices::Host, long >, - TNL::Matrices::Dense< long, TNL::Devices::Host, long >, - TNL::Matrices::Dense< float, TNL::Devices::Host, long >, - TNL::Matrices::Dense< double, TNL::Devices::Host, long > + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::Dense< int, TNL::Devices::Cuda, short >, - TNL::Matrices::Dense< long, TNL::Devices::Cuda, short >, - TNL::Matrices::Dense< float, TNL::Devices::Cuda, short >, - TNL::Matrices::Dense< double, TNL::Devices::Cuda, short >, - TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >, - TNL::Matrices::Dense< long, TNL::Devices::Cuda, int >, - TNL::Matrices::Dense< float, TNL::Devices::Cuda, int >, - TNL::Matrices::Dense< double, TNL::Devices::Cuda, int >, - TNL::Matrices::Dense< int, TNL::Devices::Cuda, long >, - TNL::Matrices::Dense< long, TNL::Devices::Cuda, long >, - TNL::Matrices::Dense< float, TNL::Devices::Cuda, long >, - TNL::Matrices::Dense< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long > #endif >; TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); +TYPED_TEST( Matrix, getSerializationType ) +{ + test_GetSerializationType(); +} + TYPED_TEST( MatrixTest, setDimensionsTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1491,114 +1488,114 @@ TYPED_TEST( MatrixTest, printTest ) //// test_getType is not general enough yet. DO NOT TEST IT YET. -//TEST( DenseMatrixTest, Dense_GetTypeTest_Host ) +//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Host ) //{ -// host_test_GetType< Dense_host_float, Dense_host_int >(); +// host_test_GetType< Tridiagonal_host_float, Tridiagonal_host_int >(); //} // //#ifdef HAVE_CUDA -//TEST( DenseMatrixTest, Dense_GetTypeTest_Cuda ) +//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Cuda ) //{ -// cuda_test_GetType< Dense_cuda_float, Dense_cuda_int >(); +// cuda_test_GetType< Tridiagonal_cuda_float, Tridiagonal_cuda_int >(); //} //#endif -/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) +/*TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Host ) { bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; - std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(609): error: no instance of function template \"TNL::Matrices::DenseMatrixProductKernel\" matches the argument list\n"; - std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, Dense_host_int *, const int, const int, int, int)\n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::TridiagonalMatrixProductKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal *, Tridiagonal_host_int *, Tridiagonal_host_int *, const int, const int, int, int)\n"; std::cout << " detected during:\n"; - std::cout << " instantiation of \"void TNL::Matrices::Dense::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense::RealType &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Dense_host_int, Matrix2=Dense_host_int, tileDim=32]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n"; - std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Dense_host_int]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1315): here\n\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal::RealType &, const TNL::Matrices::Tridiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Tridiagonal_host_int, Matrix2=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1315): here\n\n"; } #ifdef HAVE_CUDA -TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda ) +TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Cuda ) { bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; std::cout << "If launched on GPU, this test will not build, but will print the following message: \n"; - std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; std::cout << " detected during:\n"; - std::cout << " instantiation of \"void TNL::Matrices::DenseMatrixProductKernel(TNL::Matrices::Dense *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; - std::cout << " instantiation of \"void TNL::Matrices::Dense::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Dense::RealType &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Dense_cuda_int, Matrix2=Dense_cuda_int, tileDim=32]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(901): here\n"; - std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Dense_cuda_int]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1332): here\n\n"; + std::cout << " instantiation of \"void TNL::Matrices::TridiagonalMatrixProductKernel(TNL::Matrices::Tridiagonal *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal::RealType &, const TNL::Matrices::Tridiagonal::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Tridiagonal_cuda_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1332): here\n\n"; } #endif -TEST( DenseMatrixTest, Dense_getTranspositionTest_Host ) +TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Host ) { -// test_GetTransposition< Dense_host_int >(); +// test_GetTransposition< Tridiagonal_host_int >(); bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; - std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(836): error: no instance of function template \"TNL::Matrices::DenseTranspositionAlignedKernel\" matches the argument list\n"; - std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, const int, int, int)\n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal *, Tridiagonal_host_int *, const int, int, int)\n"; std::cout << " detected during:\n"; - std::cout << " instantiation of \"void TNL::Matrices::Dense::getTransposition(const Matrix &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n"; - std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Dense_host_int]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n"; std::cout << "AND this message: \n"; - std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h(852): error: no instance of function template \"TNL::Matrices::DenseTranspositionNonAlignedKernel\" matches the argument list\n"; - std::cout << " argument types are: (TNL::Matrices::Dense *, Dense_host_int *, const int, int, int)\n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionNonAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal *, Tridiagonal_host_int *, const int, int, int)\n"; std::cout << " detected during:\n"; - std::cout << " instantiation of \"void TNL::Matrices::Dense::getTransposition(const Matrix &, const TNL::Matrices::Dense::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Dense_host_int, tileDim=32]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(977): here\n"; - std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Dense_host_int]\"\n"; - std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/DenseMatrixTest.h(1420): here\n\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n"; } #ifdef HAVE_CUDA -TEST( DenseMatrixTest, Dense_getTranspositionTest_Cuda ) +TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Cuda ) { -// test_GetTransposition< Dense_cuda_int >(); +// test_GetTransposition< Tridiagonal_cuda_int >(); bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; std::cout << "If launched on GPU, this test throws the following message: \n"; std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n"; - std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; std::cout << " Line: 329 \n"; std::cout << " Diagnostics: Not supported with CUDA.\n"; std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; - std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; std::cout << " Line: 329 \n"; std::cout << " Diagnostics: Not supported with CUDA.\n"; std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; - std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; std::cout << " Line: 329 \n"; std::cout << " Diagnostics: Not supported with CUDA.\n"; std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; - std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Dense_impl.h \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; std::cout << " Line: 329 \n"; std::cout << " Diagnostics: Not supported with CUDA.\n"; std::cout << " terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n"; std::cout << " what(): CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n"; std::cout << " Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n"; - std::cout << " [1] 4003 abort (core dumped) ./DenseMatrixTest-dbg\n"; + std::cout << " [1] 4003 abort (core dumped) ./TridiagonalMatrixTest-dbg\n"; } #endif -TEST( DenseMatrixTest, Dense_performSORIterationTest_Host ) +TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Host ) { - test_PerformSORIteration< Dense_host_float >(); + test_PerformSORIteration< Tridiagonal_host_float >(); } #ifdef HAVE_CUDA -TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda ) +TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Cuda ) { -// test_PerformSORIteration< Dense_cuda_float >(); +// test_PerformSORIteration< Tridiagonal_cuda_float >(); bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -- GitLab From 72972cfed69a49860bcd71d94fbe34a1dcefde33 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 18:27:50 +0100 Subject: [PATCH 076/179] Added TridiagonalMatrixView. --- src/TNL/Matrices/TridiagonalMatrixView.h | 209 ++++++ src/TNL/Matrices/TridiagonalMatrixView.hpp | 759 +++++++++++++++++++++ 2 files changed, 968 insertions(+) create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.h create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.hpp diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h new file mode 100644 index 000000000..3f57fe1c3 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -0,0 +1,209 @@ +/*************************************************************************** + Tridiagonal.h - description + ------------------- + begin : Nov 30, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Device > +class TridiagonalDeviceDependentCode; + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int > +class Tridiagonal : public Matrix< Real, Device, Index > +{ +private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + + // friend class will be needed for templated assignment operators + template< typename Real2, typename Device2, typename Index2 > + friend class Tridiagonal; + +public: + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef Matrix< Real, Device, Index > BaseType; + typedef TridiagonalRow< Real, Index > MatrixRow; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Tridiagonal< _Real, _Device, _Index >; + + Tridiagonal(); + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + void setDimensions( const IndexType rows, + const IndexType columns ); + + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + + IndexType getRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + template< typename Real2, typename Device2, typename Index2 > + void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + + IndexType getNumberOfMatrixElements() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + IndexType getMaxRowlength() const; + + void reset(); + + template< typename Real2, typename Device2, typename Index2 > + bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + + template< typename Real2, typename Device2, typename Index2 > + bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + + void setValue( const RealType& v ); + + __cuda_callable__ + bool setElementFast( const IndexType row, + const IndexType column, + const RealType& value ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + __cuda_callable__ + bool addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + __cuda_callable__ + bool setRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements ); + + bool setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements ); + + __cuda_callable__ + bool addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements, + const RealType& thisRowMultiplicator = 1.0 ); + + bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements, + const RealType& thisRowMultiplicator = 1.0 ); + + __cuda_callable__ + RealType getElementFast( const IndexType row, + const IndexType column ) const; + + RealType getElement( const IndexType row, + const IndexType column ) const; + + __cuda_callable__ + void getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const; + + __cuda_callable__ + MatrixRow getRow( const IndexType rowIndex ); + + __cuda_callable__ + const MatrixRow getRow( const IndexType rowIndex ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real2, typename Index2 > + void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + // copy assignment + Tridiagonal& operator=( const Tridiagonal& matrix ); + + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + + void save( File& file ) const; + + void load( File& file ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void print( std::ostream& str ) const; + +protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; + + Containers::Vector< RealType, DeviceType, IndexType > values; + + typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class TridiagonalDeviceDependentCode< DeviceType >; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp new file mode 100644 index 000000000..2752f6850 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -0,0 +1,759 @@ +/*************************************************************************** + Tridiagonal_impl.h - description + ------------------- + begin : Nov 30, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Device > +class TridiagonalDeviceDependentCode; + +template< typename Real, + typename Device, + typename Index > +Tridiagonal< Real, Device, Index >::Tridiagonal() +{ +} + +template< typename Real, + typename Device, + typename Index > +String Tridiagonal< Real, Device, Index >::getType() +{ + return String( "Matrices::Tridiagonal< " ) + + String( TNL::getType< Real >() ) + + String( ", " ) + + String( Device :: getDeviceType() ) + + String( ", " ) + + String( TNL::getType< Index >() ) + + String( " >" ); +} + +template< typename Real, + typename Device, + typename Index > +String Tridiagonal< Real, Device, Index >::getTypeVirtual() const +{ + return this->getType(); +} + +template< typename Real, + typename Device, + typename Index > +String Tridiagonal< Real, Device, Index >::getSerializationType() +{ + return String( "Matrices::Tridiagonal< " ) + + getType< RealType >() + ", " + + getType< Device >() + ", " + + getType< IndexType >() + " >"; +} + +template< typename Real, + typename Device, + typename Index > +String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, + const IndexType columns ) +{ + Matrix< Real, Device, Index >::setDimensions( rows, columns ); + values.setSize( 3*min( rows, columns ) ); + this->values.setValue( 0.0 ); +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) +{ + if( rowLengths[ 0 ] > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); + for( Index i = 1; i < diagonalLength-1; i++ ) + if( rowLengths[ i ] > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() > this->getColumns() ) + if( rowLengths[ this->getRows()-1 ] > 1 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() == this->getColumns() ) + if( rowLengths[ this->getRows()-1 ] > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() < this->getColumns() ) + if( rowLengths[ this->getRows()-1 ] > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); +} + +template< typename Real, + typename Device, + typename Index > +Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const +{ + return this->getRowLengthFast( row ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const +{ + const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); + if( row == 0 ) + return 2; + if( row > 0 && row < diagonalLength - 1 ) + return 3; + if( this->getRows() > this->getColumns() ) + return 1; + if( this->getRows() == this->getColumns() ) + return 2; + return 3; +} + +template< typename Real, + typename Device, + typename Index > +Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const +{ + return 3; +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Device2, typename Index2 > +void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) +{ + this->setDimensions( m.getRows(), m.getColumns() ); +} + +template< typename Real, + typename Device, + typename Index > +Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const +{ + return 3 * min( this->getRows(), this->getColumns() ); +} + +template< typename Real, + typename Device, + typename Index > +Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const +{ + IndexType nonzeroElements = 0; + for( IndexType i = 0; i < this->values.getSize(); i++ ) + if( this->values.getElement( i ) != 0 ) + nonzeroElements++; + return nonzeroElements; +} + +template< typename Real, + typename Device, + typename Index > +Index +Tridiagonal< Real, Device, Index >:: +getMaxRowlength() const +{ + return 3; +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::reset() +{ + Matrix< Real, Device, Index >::reset(); + this->values.reset(); +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Device2, typename Index2 > +bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const +{ + return this->values == matrix.values; +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Device2, typename Index2 > +bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const +{ + return this->values != matrix.values; +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::setValue( const RealType& v ) +{ + this->values.setValue( v ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->values[ this->getElementIndex( row, column ) ] = value; + return true; +} + +template< typename Real, + typename Device, + typename Index > +bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->values.setElement( this->getElementIndex( row, column ), value ); + return true; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + const Index i = this->getElementIndex( row, column ); + this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; + return true; +} + +template< typename Real, + typename Device, + typename Index > +bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + const Index i = this->getElementIndex( row, column ); + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); + return true; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements ) +{ + TNL_ASSERT( elements <= this->columns, + std::cerr << " elements = " << elements + << " this->columns = " << this->columns ); + return this->addRowFast( row, columns, values, elements, 0.0 ); +} + +template< typename Real, + typename Device, + typename Index > +bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements ) +{ + TNL_ASSERT( elements <= this->columns, + std::cerr << " elements = " << elements + << " this->columns = " << this->columns ); + return this->addRow( row, columns, values, elements, 0.0 ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements, + const RealType& thisRowMultiplicator ) +{ + TNL_ASSERT( elements <= this->columns, + std::cerr << " elements = " << elements + << " this->columns = " << this->columns ); + if( elements > 3 ) + return false; + for( IndexType i = 0; i < elements; i++ ) + { + const IndexType& column = columns[ i ]; + if( column < row - 1 || column > row + 1 ) + return false; + addElementFast( row, column, values[ i ], thisRowMultiplicator ); + } + return true; +} + +template< typename Real, + typename Device, + typename Index > +bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements, + const RealType& thisRowMultiplicator ) +{ + TNL_ASSERT( elements <= this->columns, + std::cerr << " elements = " << elements + << " this->columns = " << this->columns ); + if( elements > 3 ) + return false; + for( IndexType i = 0; i < elements; i++ ) + { + const IndexType column = columns[ i ]; + if( column < row - 1 || column > row + 1 ) + return false; + addElement( row, column, values[ i ], thisRowMultiplicator ); + } + return true; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row, + const IndexType column ) const +{ + if( abs( column - row ) > 1 ) + return 0.0; + return this->values[ this->getElementIndex( row, column ) ]; +} + +template< typename Real, + typename Device, + typename Index > +Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row, + const IndexType column ) const +{ + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const +{ + IndexType elementPointer( 0 ); + for( IndexType i = -1; i <= 1; i++ ) + { + const IndexType column = row + 1; + if( column >= 0 && column < this->getColumns() ) + { + columns[ elementPointer ] = column; + values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; + elementPointer++; + } + } +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +typename Tridiagonal< Real, Device, Index >::MatrixRow +Tridiagonal< Real, Device, Index >:: +getRow( const IndexType rowIndex ) +{ + if( std::is_same< Device, Devices::Host >::value ) + return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], + rowIndex, + this->getColumns(), + 1 ); + if( std::is_same< Device, Devices::Cuda >::value ) + return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], + rowIndex, + this->getColumns(), + this->rows ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +const typename Tridiagonal< Real, Device, Index >::MatrixRow +Tridiagonal< Real, Device, Index >:: +getRow( const IndexType rowIndex ) const +{ + throw Exceptions::NotImplementedError(); +} + + +template< typename Real, + typename Device, + typename Index > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + return TridiagonalDeviceDependentCode< Device >:: + rowVectorProduct( this->rows, + this->values, + row, + vector ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename InVector, + typename OutVector > +void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector, + OutVector& outVector ) const +{ + TNL_ASSERT( this->getColumns() == inVector.getSize(), + std::cerr << "Matrix columns: " << this->getColumns() << std::endl + << "Vector size: " << inVector.getSize() << std::endl ); + TNL_ASSERT( this->getRows() == outVector.getSize(), + std::cerr << "Matrix rows: " << this->getRows() << std::endl + << "Vector size: " << outVector.getSize() << std::endl ); + + DeviceDependentCode::vectorProduct( *this, inVector, outVector ); +} + +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Index2 > +void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl ); + + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.values; + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Tridiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +} +#endif + +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Index2 > +void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + Tridiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE; +#endif + } +} + +template< typename Real, + typename Device, + typename Index > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +// copy assignment +template< typename Real, + typename Device, + typename Index > +Tridiagonal< Real, Device, Index >& +Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) +{ + this->setLike( matrix ); + this->values = matrix.values; + return *this; +} + +// cross-device copy assignment +template< typename Real, + typename Device, + typename Index > + template< typename Real2, typename Device2, typename Index2, typename > +Tridiagonal< Real, Device, Index >& +Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) +{ + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, + "unknown device" ); + static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, + "unknown device" ); + + this->setLike( matrix ); + + throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet."); +} + + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::save( File& file ) const +{ + Matrix< Real, Device, Index >::save( file ); + file << this->values; +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::load( File& file ) +{ + Matrix< Real, Device, Index >::load( file ); + file >> this->values; +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index > +void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = row - 1; column < row + 2; column++ ) + if( column >= 0 && column < this->columns ) + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row, + const IndexType column ) const +{ + TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, + std::cerr << " this->rows = " << this->rows + << " row = " << row << " column = " << column ); + TNL_ASSERT( abs( row - column ) < 2, + std::cerr << "row = " << row << " column = " << column << std::endl ); + return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column ); +} + +template<> +class TridiagonalDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return 2*row + column; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ 1 ]; + Index i = 3 * row; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ]; + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ] + + vector[ row + 1 ] * values[ i + 1 ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class TridiagonalDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return ( column - row + 1 )*rows + row - 1; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ rows - 1 ]; + Index i = row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ]; + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ] + + vector[ row + 1 ] * values[ i + 2*rows ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + +} // namespace Matrices +} // namespace TNL -- GitLab From 97888bf1ec2d2f97c67a9ff25d92b1957a66d526 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 18:28:18 +0100 Subject: [PATCH 077/179] Added tridiagonal matrix unit tests to CMakeLists.txt. --- src/UnitTests/Matrices/CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index e4616f23b..333dee952 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -10,8 +10,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - #### - # Segments tests + CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -34,8 +35,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - #### - # Segments tests + ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -53,12 +56,9 @@ ENDIF( BUILD_CUDA ) ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -# TODO: Uncomment the following when AdEllpack works -#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -#### -# Segments tests ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) -- GitLab From b3d86f895ee6e994909b719a08705b0673f84011 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 8 Jan 2020 18:28:57 +0100 Subject: [PATCH 078/179] Updating API of tridiagonal matrix. --- src/TNL/Matrices/Tridiagonal.h | 246 ++++++++-------- src/TNL/Matrices/Tridiagonal.hpp | 484 ++++++++++++++++--------------- 2 files changed, 366 insertions(+), 364 deletions(-) diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 3f57fe1c3..f80bc4c18 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -13,197 +13,179 @@ #include #include #include +#include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class TridiagonalDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, - typename Index = int > -class Tridiagonal : public Matrix< Real, Device, Index > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > +class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Tridiagonal; + // friend class will be needed for templated assignment operators + template< typename Real2, typename Device2, typename Index2 > + friend class Tridiagonal; -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef TridiagonalRow< Real, Index > MatrixRow; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Tridiagonal< _Real, _Device, _Index >; - Tridiagonal(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Tridiagonal< _Real, _Device, _Index >; - static String getSerializationType(); + Tridiagonal(); - virtual String getSerializationTypeVirtual() const; + Tridiagonal( const IndexType rows, const IndexType columns ); - void setDimensions( const IndexType rows, - const IndexType columns ); + ViewType getView(); - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + ConstViewType getConstView() const; - IndexType getRowLength( const IndexType row ) const; + static String getSerializationType(); - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; + virtual String getSerializationTypeVirtual() const; - IndexType getMaxRowLength() const; + void setDimensions( const IndexType rows, + const IndexType columns ); - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - IndexType getNumberOfMatrixElements() const; + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getNumberOfNonzeroMatrixElements() const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowlength() const; + IndexType getMaxRowLength() const; - void reset(); + template< typename Real2, typename Device2, typename Index2 > + void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); - template< typename Real2, typename Device2, typename Index2 > - bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getNumberOfMatrixElements() const; - template< typename Real2, typename Device2, typename Index2 > - bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getNumberOfNonzeroMatrixElements() const; - void setValue( const RealType& v ); + IndexType getMaxRowlength() const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); + void reset(); - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const; - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + void setValue( const RealType& v ); - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + RealType getElement( const IndexType row, + const IndexType column ) const; - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - RealType getElement( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + template< typename Function > + void forAllRows( Function& function ) const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + template< typename Function > + void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + template< typename Real2, typename Index2 > + void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - template< typename Real2, typename Index2 > - void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); + template< typename Real2, typename Index2 > + void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - // copy assignment - Tridiagonal& operator=( const Tridiagonal& matrix ); + // copy assignment + Tridiagonal& operator=( const Tridiagonal& matrix ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); - void save( File& file ) const; + void save( File& file ) const; - void load( File& file ); + void load( File& file ); - void save( const String& fileName ) const; + void save( const String& fileName ) const; - void load( const String& fileName ); + void load( const String& fileName ); - void print( std::ostream& str ) const; + void print( std::ostream& str ) const; -protected: + protected: - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; - Containers::Vector< RealType, DeviceType, IndexType > values; + Containers::Vector< RealType, DeviceType, IndexType > values; - typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class TridiagonalDeviceDependentCode< DeviceType >; + typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class TridiagonalDeviceDependentCode< DeviceType >; }; } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 2752f6850..c36edec0b 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -15,74 +15,81 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class TridiagonalDeviceDependentCode; template< typename Real, typename Device, - typename Index > -Tridiagonal< Real, Device, Index >::Tridiagonal() -{ -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal() { - return String( "Matrices::Tridiagonal< " ) + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal( const IndexType rows, const IndexType columns ) { - return this->getType(); + this->setDimensions( rows, columns ); } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationType() { return String( "Matrices::Tridiagonal< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationTypeVirtual() const { return this->getSerializationType(); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); values.setSize( 3*min( rows, columns ) ); - this->values.setValue( 0.0 ); + this->values = 0.0; } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { if( rowLengths[ 0 ] > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); @@ -103,17 +110,12 @@ void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompresse template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const -{ - return this->getRowLengthFast( row ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRowLength( const IndexType row ) const { const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); if( row == 0 ) @@ -129,46 +131,64 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getMaxRowLength() const { return 3; } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Device2, typename Index2 > -void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) { this->setDimensions( m.getRows(), m.getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getNumberOfMatrixElements() const { return 3 * min( this->getRows(), this->getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getNumberOfNonzeroMatrixElements() const { - IndexType nonzeroElements = 0; - for( IndexType i = 0; i < this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0 ) - nonzeroElements++; - return nonzeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > Index -Tridiagonal< Real, Device, Index >:: +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getMaxRowlength() const { return 3; @@ -176,8 +196,12 @@ getMaxRowlength() const template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::reset() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +reset() { Matrix< Real, Device, Index >::reset(); this->values.reset(); @@ -185,48 +209,55 @@ void Tridiagonal< Real, Device, Index >::reset() template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values == matrix.values; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values != matrix.values; + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setValue( const RealType& v ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const { - this->values.setValue( v ); + return ! this->operator==( matrix ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setValue( const RealType& v ) { - this->values[ this->getElementIndex( row, column ) ] = value; - return true; + this->values = v; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) { this->values.setElement( this->getElementIndex( row, column ), value ); return true; @@ -234,159 +265,120 @@ bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) { const Index i = this->getElementIndex( row, column ); - this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); return true; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, const IndexType column ) const { - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRowFast( row, columns, values, elements, 0.0 ); + } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRow( row, columns, values, elements, 0.0 ); -} -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType& column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElementFast( row, column, values[ i ], thisRowMultiplicator ); - } - return true; } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElement( row, column, values[ i ], thisRowMultiplicator ); - } - return true; + } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) { - if( abs( column - row ) > 1 ) - return 0.0; - return this->values[ this->getElementIndex( row, column ) ]; } template< typename Real, typename Device, - typename Index > -Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const { - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) { - IndexType elementPointer( 0 ); - for( IndexType i = -1; i <= 1; i++ ) - { - const IndexType column = row + 1; - if( column >= 0 && column < this->getColumns() ) - { - columns[ elementPointer ] = column; - values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; - elementPointer++; - } - } + } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: +typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) { if( std::is_same< Device, Devices::Host >::value ) @@ -403,10 +395,12 @@ getRow( const IndexType rowIndex ) template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -const typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: +const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType rowIndex ) const { throw Exceptions::NotImplementedError(); @@ -415,10 +409,12 @@ getRow( const IndexType rowIndex ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row, +typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, const Vector& vector ) const { return TridiagonalDeviceDependentCode< Device >:: @@ -430,10 +426,12 @@ typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename InVector, typename OutVector > -void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), @@ -448,9 +446,11 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -494,9 +494,11 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), @@ -541,10 +543,12 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -561,9 +565,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, // copy assignment template< typename Real, typename Device, - typename Index > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -573,10 +579,12 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) // cross-device copy assignment template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Real2, typename Device2, typename Index2, typename > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -591,8 +599,10 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2 template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( File& file ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); file << this->values; @@ -600,8 +610,10 @@ void Tridiagonal< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( File& file ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); file >> this->values; @@ -609,24 +621,30 @@ void Tridiagonal< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const { Object::save( fileName ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( const String& fileName ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -640,9 +658,11 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row, +Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, const IndexType column ) const { TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, @@ -694,7 +714,7 @@ class TridiagonalDeviceDependentCode< Devices::Host > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { @@ -710,7 +730,7 @@ template<> class TridiagonalDeviceDependentCode< Devices::Cuda > { public: - + typedef Devices::Cuda Device; template< typename Index > @@ -747,7 +767,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { -- GitLab From 9a565126aca84cb100eb7d19b2274b6ff10b2ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 9 Jan 2020 22:11:58 +0100 Subject: [PATCH 079/179] Reimplementing tridiagonal matrix. --- src/TNL/Matrices/SparseMatrix.hpp | 1 - src/TNL/Matrices/Tridiagonal.h | 62 +- src/TNL/Matrices/Tridiagonal.hpp | 303 +++++-- src/TNL/Matrices/TridiagonalMatrixRowView.h | 59 ++ src/TNL/Matrices/TridiagonalMatrixRowView.hpp | 75 ++ src/TNL/Matrices/TridiagonalMatrixView.h | 255 +++--- src/TNL/Matrices/TridiagonalMatrixView.hpp | 798 ++++++++---------- src/TNL/Matrices/TridiagonalRow.h | 51 -- src/TNL/Matrices/TridiagonalRow_impl.h | 78 -- .../details/TridiagonalMatrixIndexer.h | 90 ++ .../Matrices/TridiagonalMatrixTest.h | 6 +- 11 files changed, 964 insertions(+), 814 deletions(-) create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.h create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.hpp delete mode 100644 src/TNL/Matrices/TridiagonalRow.h delete mode 100644 src/TNL/Matrices/TridiagonalRow_impl.h create mode 100644 src/TNL/Matrices/details/TridiagonalMatrixIndexer.h diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 8dbe53f4d..6189d43d3 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -515,7 +515,6 @@ forRows( IndexType first, IndexType last, Function& function ) return true; }; this->segments.forSegments( first, last, f ); - } template< typename Real, diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index f80bc4c18..51e05c899 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -12,15 +12,14 @@ #include #include -#include +#include #include +#include +#include namespace TNL { namespace Matrices { -template< typename Device > -class TridiagonalDeviceDependentCode; - template< typename Real = double, typename Device = Devices::Host, typename Index = int, @@ -28,27 +27,23 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > { - private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Tridiagonal; - public: using RealType = Real; using DeviceType = Device; using IndexType = Index; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; - //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; - //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; - using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >; + using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; template< typename _Real = Real, typename _Device = Device, @@ -70,7 +65,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + //template< typename Vector > + void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; @@ -80,8 +76,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexType getMaxRowLength() const; - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); IndexType getNumberOfMatrixElements() const; @@ -91,11 +87,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void reset(); - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const; + RowView getRow( const IndexType& rowIdx ); + + const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); @@ -139,8 +139,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); @@ -159,9 +159,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > Tridiagonal& operator=( const Tridiagonal& matrix ); // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ); void save( File& file ) const; @@ -177,12 +176,9 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getElementIndex( const IndexType row, - const IndexType column ) const; - - Containers::Vector< RealType, DeviceType, IndexType > values; + const IndexType localIdx ) const; - typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class TridiagonalDeviceDependentCode< DeviceType >; + IndexerType indexer; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index c36edec0b..a7178f86e 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Tridiagonal_impl.h - description + Tridiagonal.hpp - description ------------------- begin : Nov 30, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -41,6 +41,30 @@ Tridiagonal( const IndexType rows, const IndexType columns ) this->setDimensions( rows, columns ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getView() -> ViewType +{ + return ViewType( this->values.getView(), indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +} + template< typename Real, typename Device, typename Index, @@ -78,7 +102,8 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); - values.setSize( 3*min( rows, columns ) ); + this->indexer.setDimensions( rows, columns ); + this->values.setSize( this->indexer.getStorageSize() ); this->values = 0.0; } @@ -87,24 +112,24 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > + // template< typename Vector > void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) +setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) { - if( rowLengths[ 0 ] > 2 ) + if( max( rowLengths ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( rowLengths.getElement( 0 ) > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - for( Index i = 1; i < diagonalLength-1; i++ ) - if( rowLengths[ i ] > 3 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); if( this->getRows() > this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 1 ) + if( rowLengths.getElement( this->getRows()-1 ) > 1 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); if( this->getRows() == this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 2 ) + if( rowLengths.getElement( this->getRows()-1 ) > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); if( this->getRows() < this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 3 ) + if( rowLengths.getElement( this->getRows()-1 ) > 3 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); } @@ -146,10 +171,10 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > - template< typename Real2, typename Device2, typename Index2 > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) +setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) { this->setDimensions( m.getRows(), m.getColumns() ); } @@ -250,6 +275,32 @@ setValue( const RealType& v ) this->values = v; } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return RowView( this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return RowView( this->values.getView(), this->indexer ); +} + template< typename Real, typename Device, typename Index, @@ -259,6 +310,12 @@ bool Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); this->values.setElement( this->getElementIndex( row, column ), value ); return true; } @@ -275,6 +332,12 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); return true; @@ -289,6 +352,11 @@ Real Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( column - row ) > 1 ) return 0.0; return this->values.getElement( this->getElementIndex( row, column ) ); @@ -304,7 +372,46 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) + { + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ), + fetch( 0, 1, i_1, values_view[ i_1 ] ) ) ); + return; + } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + + keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), + fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ), + fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), + fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, @@ -317,7 +424,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -330,7 +437,45 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { - + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) + { + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + function( 0, 1, rowIdx, values_view[ i_0 ] ); + function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); + return; + } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, @@ -343,6 +488,45 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) + { + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + function( 0, 1, rowIdx, values_view[ i_0 ] ); + function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); + return; + } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, @@ -355,7 +539,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) const { - + this->forRows( 0, this->getRows(), function ); } template< typename Real, @@ -368,45 +552,9 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) { - -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getRow( const IndexType rowIndex ) -{ - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - this->rows ); + this->forRows( 0, this->getRows(), function ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -__cuda_callable__ -const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getRow( const IndexType rowIndex ) const -{ - throw Exceptions::NotImplementedError(); -} - - template< typename Real, typename Device, typename Index, @@ -414,8 +562,9 @@ template< typename Real, typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, - const Vector& vector ) const +typename Vector::RealType +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const { return TridiagonalDeviceDependentCode< Device >:: rowVectorProduct( this->rows, @@ -431,8 +580,9 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl @@ -441,7 +591,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorPro std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, @@ -449,10 +599,12 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > - template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator, - const RealType& thisMatrixMultiplicator ) + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), std::cerr << "This matrix columns: " << this->getColumns() << std::endl @@ -582,13 +734,14 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > - template< typename Real2, typename Device2, typename Index2, typename > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, + static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, "unknown device" ); this->setLike( matrix ); @@ -605,7 +758,6 @@ template< typename Real, void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); - file << this->values; } template< typename Real, @@ -616,7 +768,7 @@ template< typename Real, void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); - file >> this->values; + this->indexer.setDimensions( this->getRows(), this->getColumns() ); } template< typename Real, @@ -662,17 +814,17 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator > __cuda_callable__ -Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, - const IndexType column ) const +Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElementIndex( const IndexType row, const IndexType localIdx ) const { - TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, - std::cerr << " this->rows = " << this->rows - << " row = " << row << " column = " << column ); - TNL_ASSERT( abs( row - column ) < 2, - std::cerr << "row = " << row << " column = " << column << std::endl ); - return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column ); + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + return this->indexer.getGlobalIndex( row, localIdx ); } +/* template<> class TridiagonalDeviceDependentCode< Devices::Host > { @@ -774,6 +926,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda > MatrixVectorProductCuda( matrix, inVector, outVector ); } }; + */ } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h new file mode 100644 index 000000000..e77d826e0 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h @@ -0,0 +1,59 @@ +/*************************************************************************** + TridiagonalMatrixRowView.h - description + ------------------- + begin : Dec 31, 2014 + copyright : (C) 2014 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, + typename Indexer > +class TridiagonalMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using IndexType = typename ValuesView::IndexType; + using ValuesViewType = ValuesView; + using IndexerType = Indexer; + + __cuda_callable__ + TridiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const RealType& value ); + protected: + + IndexType rowIdx; + + ValuesViewType values; + + Indexer indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp new file mode 100644 index 000000000..ba60876b9 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp @@ -0,0 +1,75 @@ +/*************************************************************************** + TridiagonalMatrixRowView.hpp - description + ------------------- + begin : Dec 31, 2014 + copyright : (C) 2014 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +TridiagonalMatrixRowView< ValuesView, Indexer >:: +TridiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ) +: rowIdx( rowIdx ), values( values ), indexer( indexer ) +{ +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getSize() const -> IndexType +{ + return indexer.getRowSize(); +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType +{ + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + return rowIdx + localIdx - 1; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) -> RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +void +TridiagonalMatrixRowView< ValuesView, Indexer >:: +setElement( const IndexType localIdx, + const RealType& value ) +{ + this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value; +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 3f57fe1c3..05f7663c9 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -1,8 +1,8 @@ /*************************************************************************** - Tridiagonal.h - description + TridiagonalMatrixView.h - description ------------------- - begin : Nov 30, 2013 - copyright : (C) 2013 by Tomas Oberhuber + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -10,200 +10,163 @@ #pragma once -#include +#include #include -#include +#include +#include +#include namespace TNL { -namespace Matrices { - -template< typename Device > -class TridiagonalDeviceDependentCode; +namespace Matrices { template< typename Real = double, typename Device = Devices::Host, - typename Index = int > -class Tridiagonal : public Matrix< Real, Device, Index > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class TridiagonalMatrixView : public MatrixView< Real, Device, Index > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Tridiagonal; - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef TridiagonalRow< Real, Index > MatrixRow; - - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Tridiagonal< _Real, _Device, _Index >; - - Tridiagonal(); - - static String getSerializationType(); - - virtual String getSerializationTypeVirtual() const; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using ValuesViewType = typename BaseType::ValuesView; + using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; - void setDimensions( const IndexType rows, - const IndexType columns ); + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > + using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; - IndexType getRowLength( const IndexType row ) const; + TridiagonalMatrixView(); - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; + TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ); - IndexType getMaxRowLength() const; + ViewType getView(); - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + ConstViewType getConstView() const; - IndexType getNumberOfMatrixElements() const; + static String getSerializationType(); - IndexType getNumberOfNonzeroMatrixElements() const; + virtual String getSerializationTypeVirtual() const; - IndexType getMaxRowlength() const; + void setDimensions( const IndexType rows, + const IndexType columns ); - void reset(); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - template< typename Real2, typename Device2, typename Index2 > - bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - template< typename Real2, typename Device2, typename Index2 > - bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getMaxRowLength() const; - void setValue( const RealType& v ); + IndexType getNumberOfMatrixElements() const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); + IndexType getNumberOfNonzeroMatrixElements() const; - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + IndexType getMaxRowlength() const; - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + RowView getRow( const IndexType& rowIdx ); - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + const RowView getRow( const IndexType& rowIdx ) const; - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + void setValue( const RealType& v ); - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); - __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - RealType getElement( const IndexType row, - const IndexType column ) const; + RealType getElement( const IndexType row, + const IndexType column ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename Function > + void forAllRows( Function& function ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + template< typename Function > + void forAllRows( Function& function ); - template< typename Real2, typename Index2 > - void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - // copy assignment - Tridiagonal& operator=( const Tridiagonal& matrix ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + template< typename Real2, typename Index2 > + void getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); - void save( File& file ) const; + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - void load( File& file ); + // copy assignment + TridiagonalMatrixView& operator=( const TridiagonalMatrixView& matrix ); - void save( const String& fileName ) const; + // cross-device copy assignment + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + TridiagonalMatrixView& operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ); - void load( const String& fileName ); + void save( File& file ) const; - void print( std::ostream& str ) const; + void save( const String& fileName ) const; -protected: + void print( std::ostream& str ) const; - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + protected: - Containers::Vector< RealType, DeviceType, IndexType > values; + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; - typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class TridiagonalDeviceDependentCode< DeviceType >; + IndexerType indexer; }; } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 2752f6850..ef893295e 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -1,8 +1,8 @@ /*************************************************************************** - Tridiagonal_impl.h - description + TridiagonalMatrixView.hpp - description ------------------- - begin : Nov 30, 2013 - copyright : (C) 2013 by Tomas Oberhuber + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -11,109 +11,85 @@ #pragma once #include -#include +#include #include namespace TNL { -namespace Matrices { - -template< typename Device > -class TridiagonalDeviceDependentCode; - -template< typename Real, - typename Device, - typename Index > -Tridiagonal< Real, Device, Index >::Tridiagonal() -{ -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getType() -{ - return String( "Matrices::Tridiagonal< " ) + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); -} +namespace Matrices { template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getTypeVirtual() const + typename Index, + bool RowMajorOrder > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView() { - return this->getType(); } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationType() + typename Index, + bool RowMajorOrder > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ) +: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer ) { - return String( "Matrices::Tridiagonal< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; } template< typename Real, typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const + typename Index, + bool RowMajorOrder > +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getView() -> ViewType { - return this->getSerializationType(); + return ViewType( this->values.getView(), indexer ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) + typename Index, + bool RowMajorOrder > +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getConstView() const -> ConstViewType { - Matrix< Real, Device, Index >::setDimensions( rows, columns ); - values.setSize( 3*min( rows, columns ) ); - this->values.setValue( 0.0 ); + return ConstViewType( this->values.getConstView(), indexer ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) + typename Index, + bool RowMajorOrder > +String +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() { - if( rowLengths[ 0 ] > 2 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - for( Index i = 1; i < diagonalLength-1; i++ ) - if( rowLengths[ i ] > 3 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() > this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 1 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() == this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 2 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() < this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 3 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + return String( "Matrices::Tridiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const + typename Index, + bool RowMajorOrder > +String +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const { - return this->getRowLengthFast( row ); + return this->getSerializationType(); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRowLength( const IndexType row ) const { const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); if( row == 0 ) @@ -129,46 +105,47 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getMaxRowLength() const { return 3; } template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) -{ - this->setDimensions( m.getRows(), m.getColumns() ); -} - -template< typename Real, - typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNumberOfMatrixElements() const { return 3 * min( this->getRows(), this->getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNumberOfNonzeroMatrixElements() const { - IndexType nonzeroElements = 0; - for( IndexType i = 0; i < this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0 ) - nonzeroElements++; - return nonzeroElements; + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > Index -Tridiagonal< Real, Device, Index >:: +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getMaxRowlength() const { return 3; @@ -176,84 +153,103 @@ getMaxRowlength() const template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::reset() -{ - Matrix< Real, Device, Index >::reset(); - this->values.reset(); + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } } template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const { - return this->values == matrix.values; + return ! this->operator==( matrix ); } template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setValue( const RealType& v ) { - return this->values != matrix.values; + this->values = v; } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setValue( const RealType& v ) + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView { - this->values.setValue( v ); + return RowView( rowIdx, this->values.getView(), this->indexer ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > __cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView { - this->values[ this->getElementIndex( row, column ) ] = value; - return true; + return RowView( rowIdx, this->values.getView(), this->indexer ); } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) -{ + typename Index, + bool RowMajorOrder > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); this->values.setElement( this->getElementIndex( row, column ), value ); return true; } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - const Index i = this->getElementIndex( row, column ); - this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ + typename Index, + bool RowMajorOrder > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); return true; @@ -261,180 +257,230 @@ bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRowFast( row, columns, values, elements, 0.0 ); -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder > +Real +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, const IndexType column ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRow( row, columns, values, elements, 0.0 ); -} + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType& column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElementFast( row, column, values[ i ], thisRowMultiplicator ); - } - return true; + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); } template< typename Real, typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElement( row, column, values[ i ], thisRowMultiplicator ); - } - return true; + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) + { + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ), + fetch( 0, 1, i_1, values_view[ i_1 ] ) ) ); + return; + } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + + keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), + fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ), + fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), + fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - if( abs( column - row ) > 1 ) - return 0.0; - return this->values[ this->getElementIndex( row, column ) ]; + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, typename Device, - typename Index > -Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row, - const IndexType column ) const -{ - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) + { + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + function( 0, 1, rowIdx, values_view[ i_0 ] ); + function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); + return; + } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const -{ - IndexType elementPointer( 0 ); - for( IndexType i = -1; i <= 1; i++ ) - { - const IndexType column = row + 1; - if( column >= 0 && column < this->getColumns() ) + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + const auto values_view = this->values.getConstView(); + const auto indexer_ = this->indexer; + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + const auto size = this->size; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + //bool compute; + if( rowIdx == 0 ) { - columns[ elementPointer ] = column; - values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; - elementPointer++; + IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); + IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); + function( 0, 1, rowIdx, values_view[ i_0 ] ); + function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); + return; } - } + if( rowIdx < size || columns > rows ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); + return; + } + if( rows == columns ) + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); + function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); + function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + } + else + { + IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); + function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: -getRow( const IndexType rowIndex ) + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const { - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - this->rows ); + this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -const typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: -getRow( const IndexType rowIndex ) const + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) { - throw Exceptions::NotImplementedError(); + this->forRows( 0, this->getRows(), function ); } - template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > template< typename Vector > __cuda_callable__ -typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row, - const Vector& vector ) const +typename Vector::RealType +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const { - return TridiagonalDeviceDependentCode< Device >:: - rowVectorProduct( this->rows, - this->values, - row, - vector ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > template< typename InVector, typename OutVector > -void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl @@ -443,16 +489,19 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator, - const RealType& thisMatrixMultiplicator ) + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), std::cerr << "This matrix columns: " << this->getColumns() << std::endl @@ -494,10 +543,13 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), std::cerr << "This matrix rows: " << this->getRows() << std::endl @@ -541,13 +593,16 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega ) const +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const { RealType sum( 0.0 ); if( row > 0 ) @@ -561,9 +616,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, // copy assignment template< typename Real, typename Device, - typename Index > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) + typename Index, + bool RowMajorOrder > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >& +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const TridiagonalMatrixView& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -573,14 +630,16 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) // cross-device copy assignment template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2, typename > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >& +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, + static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, "unknown device" ); this->setLike( matrix ); @@ -591,42 +650,29 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2 template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( File& file ) const -{ - Matrix< Real, Device, Index >::save( file ); - file << this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( File& file ) + typename Index, + bool RowMajorOrder > +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const { - Matrix< Real, Device, Index >::load( file ); - file >> this->values; + MatrixView< Real, Device, Index >::save( file ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +save( const String& fileName ) const { Object::save( fileName ); } template< typename Real, typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( const String& fileName ) -{ - Object::load( fileName ); -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + bool RowMajorOrder > +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -640,120 +686,18 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder > __cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row, - const IndexType column ) const +Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElementIndex( const IndexType row, const IndexType localIdx ) const { - TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, - std::cerr << " this->rows = " << this->rows - << " row = " << row << " column = " << column ); - TNL_ASSERT( abs( row - column ) < 2, - std::cerr << "row = " << row << " column = " << column << std::endl ); - return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column ); + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + return this->indexer.getGlobalIndex( row, localIdx ); } -template<> -class TridiagonalDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return 2*row + column; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ 1 ]; - Index i = 3 * row; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ]; - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ] + - vector[ row + 1 ] * values[ i + 1 ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class TridiagonalDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return ( column - row + 1 )*rows + row - 1; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ rows - 1 ]; - Index i = row - 1; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ]; - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ] + - vector[ row + 1 ] * values[ i + 2*rows ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalRow.h b/src/TNL/Matrices/TridiagonalRow.h deleted file mode 100644 index 9d06b39e1..000000000 --- a/src/TNL/Matrices/TridiagonalRow.h +++ /dev/null @@ -1,51 +0,0 @@ -/*************************************************************************** - TridiagonalRow.h - description - ------------------- - begin : Dec 31, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -class TridiagonalRow -{ - public: - - __cuda_callable__ - TridiagonalRow(); - - __cuda_callable__ - TridiagonalRow( Real* values, - const Index row, - const Index columns, - const Index step ); - - __cuda_callable__ - void bind( Real* values, - const Index row, - const Index columns, - const Index step ); - - __cuda_callable__ - void setElement( const Index& elementIndex, - const Index& column, - const Real& value ); - - protected: - - Real* values; - - Index row, columns, step; -}; - -} // namespace Matrices -} // namespace TNL - -#include diff --git a/src/TNL/Matrices/TridiagonalRow_impl.h b/src/TNL/Matrices/TridiagonalRow_impl.h deleted file mode 100644 index f5b7e842a..000000000 --- a/src/TNL/Matrices/TridiagonalRow_impl.h +++ /dev/null @@ -1,78 +0,0 @@ -/*************************************************************************** - TridiagonalRow_impl.h - description - ------------------- - begin : Dec 31, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -__cuda_callable__ -TridiagonalRow< Real, Index >:: -TridiagonalRow() -: values( 0 ), - row( 0 ), - columns( 0 ), - step( 0 ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -TridiagonalRow< Real, Index >:: -TridiagonalRow( Real* values, - const Index row, - const Index columns, - const Index step ) -: values( values ), - row( row ), - columns( columns ), - step( step ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -void -TridiagonalRow< Real, Index >:: -bind( Real* values, - const Index row, - const Index columns, - const Index step ) -{ - this->values = values; - this->row = row; - this->columns = columns; - this->step = step; -} - -template< typename Real, typename Index > -__cuda_callable__ -void -TridiagonalRow< Real, Index >:: -setElement( const Index& elementIndex, - const Index& column, - const Real& value ) -{ - TNL_ASSERT( this->values, ); - TNL_ASSERT( this->step > 0,); - TNL_ASSERT( column >= 0 && column < this->columns, - std::cerr << "column = " << columns << " this->columns = " << this->columns ); - TNL_ASSERT( abs( column - row ) <= 1, - std::cerr << "column = " << column << " row = " << row ); - - /**** - * this->values stores an adress of the diagonal element - */ - this->values[ ( column - row ) * this->step ] = value; -} - -} // namespace Matrices -} // namespace TNL diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h new file mode 100644 index 000000000..2f245c38f --- /dev/null +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -0,0 +1,90 @@ +/*************************************************************************** + TridiagonalMatrixIndexer.h - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Index, + bool RowMajorOrder > +class TridiagonalMatrixIndexer +{ + public: + + using IndexType = Index; + + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + + __cuda_callable__ + TridiagonalMatrixIndexer() + : rows( 0 ), columns( 0 ), size( 0 ){}; + + __cuda_callable__ + TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns ) + : rows( rows ), columns( columns ), size( TNL::min( rows, columns ) ) {}; + + __cuda_callable__ + TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer ) + : rows( indexer.rows ), columns( indexer.columns ), size( indexer.size ) {}; + + void setDimensions( const IndexType& rows, const IndexType& columns ) + { + this->rows = rows; + this->columns = columns; + this->size = min( rows, columns ); + }; + + __cuda_callable__ + IndexType getRowSize( const IndexType rowIdx ) const + { + if( rowIdx == 0 ) + return 2; + if( columns <= rows ) + { + if( rowIdx == columns - 1 ) + return 2; + if( rowIdx == columns ) + return 1; + } + return 3; + }; + + __cuda_callable__ + IndexType getRows() const { return this->rows; }; + + __cuda_callable__ + IndexType getColumns() const { return this->rows; }; + + __cuda_callable__ + IndexType getStorageSize() const { return 3 * this->size; }; + + __cuda_callable__ + IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const + { + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + TNL_ASSERT_GE( rowIdx, 0, "" ); + TNL_ASSERT_LT( rowIdx, this->rows, "" ); + + if( RowMajorOrder ) + return 3 * rowIdx + localIdx; + else + return localIdx * size + rowIdx; + }; + + protected: + + IndexType rows, columns, size; +}; + } //namespace details + } // namespace Materices +} // namespace TNL diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index 40cecb2bd..962f8c82d 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -587,7 +587,7 @@ void test_SetRow() { 2, 3, 4, 5, 6 } }; auto row = matrix_view.getRow( rowIdx ); for( IndexType i = 0; i < 5; i++ ) - row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + row.setElement( i, values[ rowIdx ][ i ] ); }; TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); @@ -1172,7 +1172,7 @@ void test_AssignmentOperator() TridiagonalHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) - hostMatrix( i, j ) = i + j; + hostMatrix.setElement( i, j, i + j ); Matrix matrix( rows, columns ); matrix.getValues() = 0.0; @@ -1369,7 +1369,7 @@ using MatrixTypes = ::testing::Types TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); -TYPED_TEST( Matrix, getSerializationType ) +TYPED_TEST( MatrixTest, getSerializationType ) { test_GetSerializationType(); } -- GitLab From 57de3baa0541918c960db0c64d99f4738fbfd822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 9 Jan 2020 22:12:32 +0100 Subject: [PATCH 080/179] Adding mutlidiagonal matrix unit tests. --- src/UnitTests/Matrices/CMakeLists.txt | 8 + .../Matrices/MultidiagonalMatrixTest.cpp | 11 + .../Matrices/MultidiagonalMatrixTest.cu | 11 + .../Matrices/MultidiagonalMatrixTest.h | 1611 +++++++++++++++++ 4 files changed, 1641 insertions(+) create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.cu create mode 100644 src/UnitTests/Matrices/MultidiagonalMatrixTest.h diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 333dee952..287495405 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -13,6 +13,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -39,6 +42,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -58,6 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp new file mode 100644 index 000000000..73406d0df --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + MultidiagonalMatrixTest.cpp - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "MultidiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu new file mode 100644 index 000000000..e3dab545c --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + MultidiagonalMatrixTest.cu - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "MultidiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h new file mode 100644 index 000000000..01ae4a518 --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -0,0 +1,1611 @@ +/*************************************************************************** + MultidiagonalMatrixTest.h - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >; +using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >; + +using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >; +using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >; + +static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; + +#ifdef HAVE_GTEST +#include + +#include + +void test_GetSerializationType() +{ + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); + + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix > +void test_GetRowLength() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRowLength( 0 ), 7 ); + EXPECT_EQ( m.getRowLength( 1 ), 7 ); + EXPECT_EQ( m.getRowLength( 2 ), 7 ); + EXPECT_EQ( m.getRowLength( 3 ), 7 ); + EXPECT_EQ( m.getRowLength( 4 ), 7 ); + EXPECT_EQ( m.getRowLength( 5 ), 7 ); + EXPECT_EQ( m.getRowLength( 6 ), 7 ); + EXPECT_EQ( m.getRowLength( 7 ), 7 ); +} + +template< typename Matrix > +void test_GetNumberOfMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 7x6 dense matrix: + * + * / 0 2 3 4 5 6 \ + * | 7 8 9 10 11 12 | + * | 13 14 15 16 17 18 | + * | 19 20 21 22 23 24 | + * | 25 26 27 28 29 30 | + * | 31 32 33 34 35 36 | + * \ 37 38 39 40 41 0 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. + m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 dense matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.setDimensions( rows, cols ); + + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_SetValue() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 7x6 dense matrix: + * + * / 1 2 3 4 5 6 \ + * | 7 8 9 10 11 12 | + * | 13 14 15 16 17 18 | + * | 19 20 21 22 23 24 | + * | 25 26 27 28 29 30 | + * | 31 32 33 34 35 36 | + * \ 37 38 39 40 41 42 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 7 ); + EXPECT_EQ( m.getElement( 1, 1 ), 8 ); + EXPECT_EQ( m.getElement( 1, 2 ), 9 ); + EXPECT_EQ( m.getElement( 1, 3 ), 10 ); + EXPECT_EQ( m.getElement( 1, 4 ), 11 ); + EXPECT_EQ( m.getElement( 1, 5 ), 12 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 13 ); + EXPECT_EQ( m.getElement( 2, 1 ), 14 ); + EXPECT_EQ( m.getElement( 2, 2 ), 15 ); + EXPECT_EQ( m.getElement( 2, 3 ), 16 ); + EXPECT_EQ( m.getElement( 2, 4 ), 17 ); + EXPECT_EQ( m.getElement( 2, 5 ), 18 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 19 ); + EXPECT_EQ( m.getElement( 3, 1 ), 20 ); + EXPECT_EQ( m.getElement( 3, 2 ), 21 ); + EXPECT_EQ( m.getElement( 3, 3 ), 22 ); + EXPECT_EQ( m.getElement( 3, 4 ), 23 ); + EXPECT_EQ( m.getElement( 3, 5 ), 24 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 25 ); + EXPECT_EQ( m.getElement( 4, 1 ), 26 ); + EXPECT_EQ( m.getElement( 4, 2 ), 27 ); + EXPECT_EQ( m.getElement( 4, 3 ), 28 ); + EXPECT_EQ( m.getElement( 4, 4 ), 29 ); + EXPECT_EQ( m.getElement( 4, 5 ), 30 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 31 ); + EXPECT_EQ( m.getElement( 5, 1 ), 32 ); + EXPECT_EQ( m.getElement( 5, 2 ), 33 ); + EXPECT_EQ( m.getElement( 5, 3 ), 34 ); + EXPECT_EQ( m.getElement( 5, 4 ), 35 ); + EXPECT_EQ( m.getElement( 5, 5 ), 36 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 37 ); + EXPECT_EQ( m.getElement( 6, 1 ), 38 ); + EXPECT_EQ( m.getElement( 6, 2 ), 39 ); + EXPECT_EQ( m.getElement( 6, 3 ), 40 ); + EXPECT_EQ( m.getElement( 6, 4 ), 41 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); + + // Set the values of all elements to a certain number + m.setValue( 42 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); + EXPECT_EQ( m.getElement( 0, 1 ), 42 ); + EXPECT_EQ( m.getElement( 0, 2 ), 42 ); + EXPECT_EQ( m.getElement( 0, 3 ), 42 ); + EXPECT_EQ( m.getElement( 0, 4 ), 42 ); + EXPECT_EQ( m.getElement( 0, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); + EXPECT_EQ( m.getElement( 1, 1 ), 42 ); + EXPECT_EQ( m.getElement( 1, 2 ), 42 ); + EXPECT_EQ( m.getElement( 1, 3 ), 42 ); + EXPECT_EQ( m.getElement( 1, 4 ), 42 ); + EXPECT_EQ( m.getElement( 1, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 42 ); + EXPECT_EQ( m.getElement( 2, 1 ), 42 ); + EXPECT_EQ( m.getElement( 2, 2 ), 42 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 42 ); + EXPECT_EQ( m.getElement( 2, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 42 ); + EXPECT_EQ( m.getElement( 3, 1 ), 42 ); + EXPECT_EQ( m.getElement( 3, 2 ), 42 ); + EXPECT_EQ( m.getElement( 3, 3 ), 42 ); + EXPECT_EQ( m.getElement( 3, 4 ), 42 ); + EXPECT_EQ( m.getElement( 3, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 42 ); + EXPECT_EQ( m.getElement( 4, 1 ), 42 ); + EXPECT_EQ( m.getElement( 4, 2 ), 42 ); + EXPECT_EQ( m.getElement( 4, 3 ), 42 ); + EXPECT_EQ( m.getElement( 4, 4 ), 42 ); + EXPECT_EQ( m.getElement( 4, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 42 ); + EXPECT_EQ( m.getElement( 5, 1 ), 42 ); + EXPECT_EQ( m.getElement( 5, 2 ), 42 ); + EXPECT_EQ( m.getElement( 5, 3 ), 42 ); + EXPECT_EQ( m.getElement( 5, 4 ), 42 ); + EXPECT_EQ( m.getElement( 5, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 42 ); + EXPECT_EQ( m.getElement( 6, 1 ), 42 ); + EXPECT_EQ( m.getElement( 6, 2 ), 42 ); + EXPECT_EQ( m.getElement( 6, 3 ), 42 ); + EXPECT_EQ( m.getElement( 6, 4 ), 42 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); +} + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * \ 21 22 23 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. +/* + * The following setup results in the following 6x5 dense matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.addElement( i, j, newValue++, multiplicator ); + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 12 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); + EXPECT_EQ( m.getElement( 1, 3 ), 27 ); + EXPECT_EQ( m.getElement( 1, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 36 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 45 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); + EXPECT_EQ( m.getElement( 3, 1 ), 51 ); + EXPECT_EQ( m.getElement( 3, 2 ), 54 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); + EXPECT_EQ( m.getElement( 4, 1 ), 66 ); + EXPECT_EQ( m.getElement( 4, 2 ), 69 ); + EXPECT_EQ( m.getElement( 4, 3 ), 72 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); + EXPECT_EQ( m.getElement( 5, 1 ), 81 ); + EXPECT_EQ( m.getElement( 5, 2 ), 84 ); + EXPECT_EQ( m.getElement( 5, 3 ), 87 ); + EXPECT_EQ( m.getElement( 5, 4 ), 90 ); +} + +template< typename Matrix > +void test_SetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 3x7 dense matrix: + * + * / 1 2 3 4 5 6 7 \ + * | 8 9 10 11 12 13 14 | + * \ 15 16 17 18 19 20 21 / + */ + const IndexType rows = 3; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 3 ][ 5 ] { + { 11, 11, 11, 11, 11 }, + { 22, 22, 22, 22, 22 }, + { 33, 33, 33, 33, 33 } }; + IndexType columnIndexes[ 3 ][ 5 ] { + { 0, 1, 2, 3, 4 }, + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 5, 6 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + row.setElement( i, values[ rowIdx ][ i ] ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 11 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + EXPECT_EQ( m.getElement( 0, 6 ), 7 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); + EXPECT_EQ( m.getElement( 1, 1 ), 22 ); + EXPECT_EQ( m.getElement( 1, 2 ), 22 ); + EXPECT_EQ( m.getElement( 1, 3 ), 22 ); + EXPECT_EQ( m.getElement( 1, 4 ), 22 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 14 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 2, 1 ), 16 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 33 ); + EXPECT_EQ( m.getElement( 2, 4 ), 33 ); + EXPECT_EQ( m.getElement( 2, 5 ), 33 ); + EXPECT_EQ( m.getElement( 2, 6 ), 33 ); +} + +template< typename Matrix > +void test_AddRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 5 ] { + { 11, 11, 11, 11, 0 }, + { 22, 22, 22, 22, 0 }, + { 33, 33, 33, 33, 0 }, + { 44, 44, 44, 44, 0 }, + { 55, 55, 55, 55, 0 }, + { 66, 66, 66, 66, 0 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 31 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 55 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 92 ); + EXPECT_EQ( m.getElement( 3, 1 ), 95 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 139 ); + EXPECT_EQ( m.getElement( 4, 1 ), 143 ); + EXPECT_EQ( m.getElement( 4, 2 ), 147 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 100 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 196 ); + EXPECT_EQ( m.getElement( 5, 1 ), 201 ); + EXPECT_EQ( m.getElement( 5, 2 ), 206 ); + EXPECT_EQ( m.getElement( 5, 3 ), 211 ); + EXPECT_EQ( m.getElement( 5, 4 ), 150 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + VectorType inVector; + inVector.setSize( 4 ); + for( IndexType i = 0; i < inVector.getSize(); i++ ) + inVector.setElement( i, 2 ); + + VectorType outVector; + outVector.setSize( 5 ); + for( IndexType j = 0; j < outVector.getSize(); j++ ) + outVector.setElement( j, 0 ); + + + m.vectorProduct( inVector, outVector); + + EXPECT_EQ( outVector.getElement( 0 ), 20 ); + EXPECT_EQ( outVector.getElement( 1 ), 52 ); + EXPECT_EQ( outVector.getElement( 2 ), 84 ); + EXPECT_EQ( outVector.getElement( 3 ), 116 ); + EXPECT_EQ( outVector.getElement( 4 ), 148 ); +} + +template< typename Matrix > +void test_AddMatrix() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + + Matrix m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + m2.setElement( i, j, newValue++ ); + + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + + Matrix mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 60 ); +} + +template< typename Matrix > +void test_GetMatrixProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType leftRows = 5; + const IndexType leftCols = 4; + + Matrix leftMatrix; + leftMatrix.reset(); + leftMatrix.setDimensions( leftRows, leftCols ); + + RealType value = 1; + for( IndexType i = 0; i < leftRows; i++ ) + for( IndexType j = 0; j < leftCols; j++) + leftMatrix.setElement( i, j, value++ ); + +/* + * Sets up the following 4x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * \ 16 17 18 19 20 / + */ + const IndexType rightRows = 4; + const IndexType rightCols = 5; + + Matrix rightMatrix; + rightMatrix.reset(); + rightMatrix.setDimensions( rightRows, rightCols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rightRows; i++ ) + for( IndexType j = 0; j < rightCols; j++) + rightMatrix.setElement( i, j, newValue++ ); + +/* + * Sets up the following 5x5 resulting dense matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + Matrix mResult; + mResult.reset(); + mResult.setDimensions( leftRows, rightCols ); + mResult.setValue( 0 ); + + RealType leftMatrixMultiplicator = 1; + RealType rightMatrixMultiplicator = 2; +/* + * / 1 2 3 4 \ / 220 240 260 280 300 \ + * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | + * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | + * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | + * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / + */ + + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); + EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); + EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); + EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); + EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 ); + EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 ); +} + +template< typename Matrix > +void test_GetTransposition() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 3x2 dense matrix: + * + * / 1 2 \ + * | 3 4 | + * \ 5 6 / + */ + const IndexType rows = 3; + const IndexType cols = 2; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.print( std::cout ); + +/* + * Sets up the following 2x3 dense matrix: + * + * / 0 0 0 \ + * \ 0 0 0 / + */ + Matrix mTransposed; + mTransposed.reset(); + mTransposed.setDimensions( cols, rows ); + + mTransposed.print( std::cout ); + + RealType matrixMultiplicator = 1; + + mTransposed.getTransposition( m, matrixMultiplicator ); + + mTransposed.print( std::cout ); + +/* + * Should result in the following 2x3 dense matrix: + * + * / 1 3 5 \ + * \ 2 4 6 / + */ + + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); + EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); + EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); + + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); + EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); + EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); +} + + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 4x4 dense matrix: + * + * / 4 1 1 1 \ + * | 1 4 1 1 | + * | 1 1 4 1 | + * \ 1 1 1 4 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0 ); + m.setElement( 0, 2, 1.0 ); + m.setElement( 0, 3, 1.0 ); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + m.setElement( 1, 3, 1.0 ); + + m.setElement( 2, 0, 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 0, 1.0 ); // 3rd row + m.setElement( 3, 1, 1.0 ); + m.setElement( 3, 2, 1.0 ); + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 0.3671875 ); +} + +template< typename Matrix > +void test_AssignmentOperator() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + + +template< typename Matrix > +void test_SaveAndLoad() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 4x4 dense matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * \ 13 14 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix savedMatrix; + savedMatrix.reset(); + savedMatrix.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + savedMatrix.setElement( i, j, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + loadedMatrix.reset(); + loadedMatrix.setDimensions( rows, cols ); + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + m.setElement( i, j, value++ ); + + #include + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" + "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" + "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" + "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" + "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); + +TYPED_TEST( MatrixTest, getSerializationType ) +{ + test_GetSerializationType(); +} + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, getRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetRowLength< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setValueTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetValue< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addMatrixTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddMatrix< MatrixType >(); +} + +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >(); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +//// test_getType is not general enough yet. DO NOT TEST IT YET. + +//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Host ) +//{ +// host_test_GetType< Multidiagonal_host_float, Multidiagonal_host_int >(); +//} +// +//#ifdef HAVE_CUDA +//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Cuda ) +//{ +// cuda_test_GetType< Multidiagonal_cuda_float, Multidiagonal_cuda_int >(); +//} +//#endif + +/*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::MultidiagonalMatrixProductKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal *, Multidiagonal_host_int *, Multidiagonal_host_int *, const int, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal::RealType &, const TNL::Matrices::Multidiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Multidiagonal_host_int, Matrix2=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1315): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Cuda ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::MultidiagonalMatrixProductKernel(TNL::Matrices::Multidiagonal *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal::RealType &, const TNL::Matrices::Multidiagonal::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct() [with Matrix=Multidiagonal_cuda_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1332): here\n\n"; +} +#endif + +TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Host ) +{ +// test_GetTransposition< Multidiagonal_host_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal *, Multidiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n"; + std::cout << "AND this message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionNonAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal *, Multidiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Cuda ) +{ +// test_GetTransposition< Multidiagonal_cuda_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test throws the following message: \n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n"; + std::cout << " what(): CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n"; + std::cout << " Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n"; + std::cout << " [1] 4003 abort (core dumped) ./MultidiagonalMatrixTest-dbg\n"; +} +#endif + +TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Host ) +{ + test_PerformSORIteration< Multidiagonal_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Cuda ) +{ +// test_PerformSORIteration< Multidiagonal_cuda_float >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched, this test throws the following message: \n"; + std::cout << " [1] 6992 segmentation fault (core dumped) ./SparseMatrixTest-dbg\n\n"; + std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; +} +#endif + * */ + +#endif // HAVE_GTEST + +#include "../main.h" -- GitLab From 000546f73ab34bc3705d7e85522c8af3a247afea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 10 Jan 2020 22:39:19 +0100 Subject: [PATCH 081/179] Added a method ArrayView::copy for shallow copy. --- src/TNL/Containers/ArrayView.h | 9 +++++++++ src/TNL/Containers/ArrayView.hpp | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index c06ad56dc..b4e063b7e 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -237,6 +237,15 @@ public: typename = std::enable_if_t< std::is_convertible< T, ValueType >::value || IsArrayType< T >::value > > ArrayView& operator=( const T& array ); + /** + * \brief Shallow copy of the array view + * + * \param view Reference to the source array view. + * \return Reference to this array view. + */ + __cuda_callable__ + ArrayView& copy( const ArrayView& view ); + /** * \brief Swaps this array view with another. * diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index c3c39bc10..4ef8ac3f6 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -118,6 +118,19 @@ operator=( const T& data ) return *this; } +template< typename Value, + typename Device, + typename Index > +__cuda_callable__ +ArrayView< Value, Device, Index >& +ArrayView< Value, Device, Index >:: +copy( const ArrayView& view ) +{ + data = view.data; + size = view.size; + return *this; +} + template< typename Value, typename Device, typename Index > -- GitLab From c799fd4ef3af47f174a45cb8a495ab6208a6fe9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 10 Jan 2020 22:40:11 +0100 Subject: [PATCH 082/179] Fixing tridiagonal matrix. --- src/TNL/Matrices/Matrix.h | 2 +- src/TNL/Matrices/Matrix.hpp | 2 +- src/TNL/Matrices/MatrixView.h | 13 +- src/TNL/Matrices/MatrixView.hpp | 14 +- src/TNL/Matrices/Tridiagonal.h | 6 +- src/TNL/Matrices/Tridiagonal.hpp | 181 +-- src/TNL/Matrices/TridiagonalMatrixView.h | 11 - src/TNL/Matrices/TridiagonalMatrixView.hpp | 147 +-- .../details/TridiagonalMatrixIndexer.h | 20 +- src/UnitTests/Matrices/DenseMatrixTest.h | 6 +- .../Matrices/MultidiagonalMatrixTest.h | 2 +- .../Matrices/TridiagonalMatrixTest.h | 1105 +++++++++-------- 12 files changed, 743 insertions(+), 766 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index a9b458d7b..7813fa962 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -64,7 +64,7 @@ public: template< typename Matrix_ > void setLike( const Matrix_& matrix ); - IndexType getNumberOfMatrixElements() const; + IndexType getAllocatedElementsCount() const; virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 29226cb00..efd26e1fa 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -91,7 +91,7 @@ template< typename Real, typename Device, typename Index, typename RealAllocator > -Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfMatrixElements() const +Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const { return this->values.getSize(); } diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 76965e511..b8adfd791 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -57,12 +57,10 @@ public: virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; - IndexType getNumberOfMatrixElements() const; + IndexType getAllocatedElementsCount() const; virtual IndexType getNumberOfNonzeroMatrixElements() const; - void reset(); - __cuda_callable__ IndexType getRows() const; @@ -91,6 +89,15 @@ public: ValuesView& getValues(); + /** + * \brief Shallow copy of the matrix view. + * + * @param view + * @return + */ + __cuda_callable__ + MatrixView& operator=( const MatrixView& view ); + // TODO: parallelize and optimize for sparse matrices template< typename Matrix > bool operator == ( const Matrix& matrix ) const; diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index bd3d9beae..b2739ae1d 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -64,7 +64,7 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe template< typename Real, typename Device, typename Index > -Index MatrixView< Real, Device, Index >::getNumberOfMatrixElements() const +Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const { return this->values.getSize(); } @@ -118,15 +118,17 @@ getValues() { return this->values; } - template< typename Real, typename Device, typename Index > -void MatrixView< Real, Device, Index >::reset() +__cuda_callable__ +MatrixView< Real, Device, Index >& +MatrixView< Real, Device, Index >:: +operator=( const MatrixView& view ) { - this->rows = 0; - this->columns = 0; - this->values.reset(); + rows = view.rows; + columns = view.columns; + values.copy( view.values ); } template< typename Real, diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 51e05c899..d28270156 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -79,12 +79,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); - IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; - IndexType getMaxRowlength() const; - void reset(); template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > @@ -179,6 +175,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > const IndexType localIdx ) const; IndexerType indexer; + + ViewType view; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index a7178f86e..c6d359d3b 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include #include #include @@ -105,6 +106,7 @@ setDimensions( const IndexType rows, const IndexType columns ) this->indexer.setDimensions( rows, columns ); this->values.setSize( this->indexer.getStorageSize() ); this->values = 0.0; + this->view = this->getView(); } template< typename Real, @@ -138,20 +140,25 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Index + template< typename Vector > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getRowLength( const IndexType row ) const +getCompressedRowLengths( Vector& rowLengths ) const { - const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - if( row == 0 ) - return 2; - if( row > 0 && row < diagonalLength - 1 ) - return 3; - if( this->getRows() > this->getColumns() ) - return 1; - if( this->getRows() == this->getColumns() ) - return 2; - return 3; + return this->view.getCompressedRowLengths( rowLengths ); + /*rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 );*/ } template< typename Real, @@ -161,9 +168,10 @@ template< typename Real, typename RealAllocator > Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getMaxRowLength() const +getRowLength( const IndexType row ) const { - return 3; + return this->view.getRowLength( row ); + //return this->indexer.getRowSize( row ); } template< typename Real, @@ -171,12 +179,11 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > -void +Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +getMaxRowLength() const { - this->setDimensions( m.getRows(), m.getColumns() ); + return this->view.getMaxRowLength(); } template< typename Real, @@ -184,11 +191,12 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Index + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getNumberOfMatrixElements() const +setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) { - return 3 * min( this->getRows(), this->getColumns() ); + this->setDimensions( m.getRows(), m.getColumns() ); } template< typename Real, @@ -200,23 +208,12 @@ Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getNumberOfNonzeroMatrixElements() const { - const auto values_view = this->values.getConstView(); + return this->view.getNumberOfNonzeroMatrixElements(); + /*const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { return ( values_view[ i ] != 0.0 ); }; - return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); -} - -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > -Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getMaxRowlength() const -{ - return 3; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/ } template< typename Real, @@ -272,7 +269,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setValue( const RealType& v ) { - this->values = v; + this->view.setValue( v ); } template< typename Real, @@ -285,7 +282,8 @@ auto Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { - return RowView( this->values.getView(), this->indexer ); + return this->view.getRow( rowIdx ); + //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -298,7 +296,8 @@ auto Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { - return RowView( this->values.getView(), this->indexer ); + return this->view.getRow( rowIdx ); + //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -310,14 +309,19 @@ bool Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { - TNL_ASSERT_GE( row, 0, "" ); + return this->view.setElement( row, column, value ); + /*TNL_ASSERT_GE( row, 0, "" ); TNL_ASSERT_LT( row, this->getRows(), "" ); TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) - throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } this->values.setElement( this->getElementIndex( row, column ), value ); - return true; + return true;*/ } template< typename Real, @@ -332,15 +336,20 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - TNL_ASSERT_GE( row, 0, "" ); + return this->view.addElement( row, column, value, thisElementMultiplicator ); + /*TNL_ASSERT_GE( row, 0, "" ); TNL_ASSERT_LT( row, this->getRows(), "" ); TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) - throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; + return true;*/ } template< typename Real, @@ -352,14 +361,15 @@ Real Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { - TNL_ASSERT_GE( row, 0, "" ); + return this->view.getElement( row, column ); + /*TNL_ASSERT_GE( row, 0, "" ); TNL_ASSERT_LT( row, this->getRows(), "" ); TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( column - row ) > 1 ) return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + return this->values.getElement( this->getElementIndex( row, column ) );*/ } template< typename Real, @@ -372,46 +382,40 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); + /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + const auto indexer = this->indexer; + const auto zero = zero_; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; + Real_ sum( zero ); if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ), - fetch( 0, 1, i_1, values_view[ i_1 ] ) ) ); + reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); + reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + keep( 0, sum ); return; } - if( rowIdx < size || columns > rows ) + if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - - keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), - fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ), - fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) ); + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); + keep( rowIdx, sum ); return; } - if( rows == columns ) + if( indexer.getRows() == indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), - fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) ); + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + keep( rowIdx, sum ); } else { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) ); + keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); } }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); + Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -424,7 +428,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, @@ -437,7 +441,8 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { - const auto values_view = this->values.getConstView(); + this->view.forRows( first, last, function ); + /*const auto values_view = this->values.getConstView(); const auto indexer_ = this->indexer; const auto rows = this->getRows(); const auto columns = this->getColumns(); @@ -475,7 +480,7 @@ forRows( IndexType first, IndexType last, Function& function ) const function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); } }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); + Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -488,7 +493,8 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { - const auto values_view = this->values.getConstView(); + this->view.forRows( first, last, function ); + /*const auto values_view = this->values.getConstView(); const auto indexer_ = this->indexer; const auto rows = this->getRows(); const auto columns = this->getColumns(); @@ -526,7 +532,7 @@ forRows( IndexType first, IndexType last, Function& function ) function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); } }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); + Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -539,7 +545,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) const { - this->forRows( 0, this->getRows(), function ); + this->view.forRows( 0, this->getRows(), function ); } template< typename Real, @@ -552,7 +558,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forAllRows( Function& function ) { - this->forRows( 0, this->getRows(), function ); + this->view.forRows( 0, this->getRows(), function ); } template< typename Real, @@ -566,11 +572,12 @@ typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { - return TridiagonalDeviceDependentCode< Device >:: + return this->view.rowVectorProduct(); + /*return TridiagonalDeviceDependentCode< Device >:: rowVectorProduct( this->rows, this->values, row, - vector ); + vector );*/ } template< typename Real, @@ -584,12 +591,13 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), + this->view.vectorProduct( inVector, outVector ); + /*TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl << "Vector size: " << inVector.getSize() << std::endl ); TNL_ASSERT( this->getRows() == outVector.getSize(), std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); + << "Vector size: " << outVector.getSize() << std::endl );*/ //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } @@ -815,12 +823,15 @@ template< typename Real, typename RealAllocator > __cuda_callable__ Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getElementIndex( const IndexType row, const IndexType localIdx ) const +getElementIndex( const IndexType row, const IndexType column ) const { - TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + TNL_ASSERT_GE( localIdx, 0, "" ); TNL_ASSERT_LT( localIdx, 3, "" ); + return this->indexer.getGlobalIndex( row, localIdx ); } diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 05f7663c9..78593acf5 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -70,12 +70,8 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > IndexType getMaxRowLength() const; - IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; - IndexType getMaxRowlength() const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; @@ -144,13 +140,6 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; - // copy assignment - TridiagonalMatrixView& operator=( const TridiagonalMatrixView& matrix ); - - // cross-device copy assignment - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - TridiagonalMatrixView& operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ); - void save( File& file ) const; void save( const String& fileName ) const; diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index ef893295e..83ff6035d 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -87,31 +87,36 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -Index + template< typename Vector > +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getRowLength( const IndexType row ) const +getCompressedRowLengths( Vector& rowLengths ) const { - const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - if( row == 0 ) - return 2; - if( row > 0 && row < diagonalLength - 1 ) - return 3; - if( this->getRows() > this->getColumns() ) - return 1; - if( this->getRows() == this->getColumns() ) - return 2; - return 3; + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); } + template< typename Real, typename Device, typename Index, bool RowMajorOrder > Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getMaxRowLength() const +getRowLength( const IndexType row ) const { - return 3; + return this->indexer.getRowSize( row ); } template< typename Real, @@ -120,9 +125,9 @@ template< typename Real, bool RowMajorOrder > Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getNumberOfMatrixElements() const +getMaxRowLength() const { - return 3 * min( this->getRows(), this->getColumns() ); + return 3; } template< typename Real, @@ -140,17 +145,6 @@ getNumberOfNonzeroMatrixElements() const return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getMaxRowlength() const -{ - return 3; -} - template< typename Real, typename Device, typename Index, @@ -228,7 +222,11 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) - throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } this->values.setElement( this->getElementIndex( row, column ), value ); return true; } @@ -249,7 +247,11 @@ addElement( const IndexType row, TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); if( abs( row - column ) > 1 ) - throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." ); + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); return true; @@ -280,45 +282,38 @@ template< typename Real, template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const { + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + const auto indexer = this->indexer; + const auto zero = zero_; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; + Real_ sum( zero ); if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ), - fetch( 0, 1, i_1, values_view[ i_1 ] ) ) ); + reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); + reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + keep( 0, sum ); return; } - if( rowIdx < size || columns > rows ) + if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - - keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), - fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ), - fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) ); + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); + keep( rowIdx, sum ); return; } - if( rows == columns ) + if( indexer.getRows() == indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ), - fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) ); + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + keep( rowIdx, sum ); } else { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) ); + keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); @@ -613,41 +608,6 @@ performSORIteration( const Vector1& b, } -// copy assignment -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >& -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator=( const TridiagonalMatrixView& matrix ) -{ - this->setLike( matrix ); - this->values = matrix.values; - return *this; -} - -// cross-device copy assignment -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >& -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) -{ - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, - "unknown device" ); - - this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet."); -} - - template< typename Real, typename Device, typename Index, @@ -690,12 +650,15 @@ template< typename Real, bool RowMajorOrder > __cuda_callable__ Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getElementIndex( const IndexType row, const IndexType localIdx ) const +getElementIndex( const IndexType row, const IndexType column ) const { - TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + TNL_ASSERT_GE( localIdx, 0, "" ); TNL_ASSERT_LT( localIdx, 3, "" ); + return this->indexer.getGlobalIndex( row, localIdx ); } diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h index 2f245c38f..d9fdd0c23 100644 --- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -26,21 +26,21 @@ class TridiagonalMatrixIndexer __cuda_callable__ TridiagonalMatrixIndexer() - : rows( 0 ), columns( 0 ), size( 0 ){}; + : rows( 0 ), columns( 0 ), nonEmptyRows( 0 ){}; __cuda_callable__ TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns ) - : rows( rows ), columns( columns ), size( TNL::min( rows, columns ) ) {}; + : rows( rows ), columns( columns ), nonEmptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {}; __cuda_callable__ TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer ) - : rows( indexer.rows ), columns( indexer.columns ), size( indexer.size ) {}; + : rows( indexer.rows ), columns( indexer.columns ), nonEmptyRows( indexer.nonEmptyRows ) {}; void setDimensions( const IndexType& rows, const IndexType& columns ) { this->rows = rows; this->columns = columns; - this->size = min( rows, columns ); + this->nonEmptyRows = min( rows, columns ) + ( rows > columns ); }; __cuda_callable__ @@ -59,13 +59,15 @@ class TridiagonalMatrixIndexer }; __cuda_callable__ - IndexType getRows() const { return this->rows; }; + const IndexType& getRows() const { return this->rows; }; __cuda_callable__ - IndexType getColumns() const { return this->rows; }; + const IndexType& getColumns() const { return this->columns; }; __cuda_callable__ - IndexType getStorageSize() const { return 3 * this->size; }; + const IndexType& getSize() const { return this->nonEmptyRows; }; + __cuda_callable__ + IndexType getStorageSize() const { return 3 * this->nonEmptyRows; }; __cuda_callable__ IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const @@ -78,12 +80,12 @@ class TridiagonalMatrixIndexer if( RowMajorOrder ) return 3 * rowIdx + localIdx; else - return localIdx * size + rowIdx; + return localIdx * nonEmptyRows + rowIdx; }; protected: - IndexType rows, columns, size; + IndexType rows, columns, nonEmptyRows; }; } //namespace details } // namespace Materices diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 183783ea3..0f7158010 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -559,9 +559,9 @@ void test_SetRow() /* * Sets up the following 3x7 dense matrix: * - * / 1 2 3 4 5 6 7 \ - * | 8 9 10 11 12 13 14 | - * \ 15 16 17 18 19 20 21 / + * / 11 11 11 11 11 6 7 \ + * | 22 22 22 22 22 13 14 | + * \ 15 16 33 33 33 33 33 / */ const IndexType rows = 3; const IndexType cols = 7; diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index 01ae4a518..abe6b64c5 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -1,7 +1,7 @@ /*************************************************************************** MultidiagonalMatrixTest.h - description ------------------- - begin : Jan 9, 2020 + begin : Jan 8, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index 962f8c82d..dcd14302a 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -46,42 +46,42 @@ void test_GetSerializationType() template< typename Matrix > void test_SetDimensions() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 9; - const IndexType cols = 8; + const IndexType rows = 9; + const IndexType cols = 8; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m; + m.setDimensions( rows, cols ); - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); } template< typename Matrix1, typename Matrix2 > void test_SetLike() { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - const IndexType rows = 8; - const IndexType cols = 7; + const IndexType rows = 8; + const IndexType cols = 7; - Matrix1 m1; - m1.reset(); - m1.setDimensions( rows + 1, cols + 2 ); + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); - Matrix2 m2; - m2.reset(); - m2.setDimensions( rows, cols ); + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); - m1.setLike( m2 ); + m1.setLike( m2 ); - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } template< typename Matrix > @@ -94,459 +94,464 @@ void test_GetCompressedRowLengths() const IndexType rows = 10; const IndexType cols = 11; - Matrix m( rows, cols ); + Matrix m( rows, cols ); - // Insert values into the rows. - RealType value = 1; + // Insert values into the rows. + RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); + for( IndexType i = 0; i < 2; i++ ) // 0th row -> 2 elements + m.setElement( 0, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 1st row - m.setElement( 1, i, value++ ); + for( IndexType i = 0; i < 3; i++ ) // 1st row -> 3 elements + m.setElement( 1, i, value++ ); - for( IndexType i = 0; i < 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); + for( IndexType i = 1; i < 3; i++ ) // 2nd row -> 2 elements + m.setElement( 2, i, value++ ); - for( IndexType i = 0; i < 2; i++ ) // 3rd row - m.setElement( 3, i, value++ ); + for( IndexType i = 2; i < 5; i++ ) // 3rd row -> 3 elements + m.setElement( 3, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 4th row - m.setElement( 4, i, value++ ); + for( IndexType i = 3; i < 6; i++ ) // 4th row -> 3 elements + m.setElement( 4, i, value++ ); - for( IndexType i = 0; i < 4; i++ ) // 5th row - m.setElement( 5, i, value++ ); + for( IndexType i = 4; i < 6; i++ ) // 5th row -> 2 elements + m.setElement( 5, i, value++ ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m.setElement( 6, i, value++ ); + for( IndexType i = 5; i < 8; i++ ) // 6th row -> 3 elements + m.setElement( 6, i, value++ ); - for( IndexType i = 0; i < 6; i++ ) // 7th row - m.setElement( 7, i, value++ ); + for( IndexType i = 6; i < 8; i++ ) // 7th row -> 2 elements + m.setElement( 7, i, value++ ); - for( IndexType i = 0; i < 7; i++ ) // 8th row - m.setElement( 8, i, value++ ); + for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements + m.setElement( 8, i, value++ ); - for( IndexType i = 0; i < 8; i++ ) // 9th row - m.setElement( 9, i, value++ ); + for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements + m.setElement( 9, i, value++ ); - typename Matrix::CompressedRowLengthsVector rowLengths; + typename Matrix::CompressedRowLengthsVector rowLengths( rows ); rowLengths = 0; m.getCompressedRowLengths( rowLengths ); - typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 }; EXPECT_EQ( rowLengths, correctRowLengths ); } template< typename Matrix > void test_GetRowLength() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 8; - const IndexType cols = 7; + const IndexType rows = 8; + const IndexType cols = 7; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getRowLength( 0 ), 7 ); - EXPECT_EQ( m.getRowLength( 1 ), 7 ); - EXPECT_EQ( m.getRowLength( 2 ), 7 ); - EXPECT_EQ( m.getRowLength( 3 ), 7 ); - EXPECT_EQ( m.getRowLength( 4 ), 7 ); - EXPECT_EQ( m.getRowLength( 5 ), 7 ); - EXPECT_EQ( m.getRowLength( 6 ), 7 ); - EXPECT_EQ( m.getRowLength( 7 ), 7 ); + EXPECT_EQ( m.getRowLength( 0 ), 2 ); + EXPECT_EQ( m.getRowLength( 1 ), 3 ); + EXPECT_EQ( m.getRowLength( 2 ), 3 ); + EXPECT_EQ( m.getRowLength( 3 ), 3 ); + EXPECT_EQ( m.getRowLength( 4 ), 3 ); + EXPECT_EQ( m.getRowLength( 5 ), 3 ); + EXPECT_EQ( m.getRowLength( 6 ), 2 ); + EXPECT_EQ( m.getRowLength( 7 ), 1 ); } template< typename Matrix > -void test_GetNumberOfMatrixElements() +void test_GetAllocatedElementsCount() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 7; - const IndexType cols = 6; + const IndexType rows = 7; + const IndexType cols = 6; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); + EXPECT_EQ( m.getAllocatedElementsCount(), 21 ); } template< typename Matrix > void test_GetNumberOfNonzeroMatrixElements() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 7x6 dense matrix: - * - * / 0 2 3 4 5 6 \ - * | 7 8 9 10 11 12 | - * | 13 14 15 16 17 18 | - * | 19 20 21 22 23 24 | - * | 25 26 27 28 29 30 | - * | 31 32 33 34 35 36 | - * \ 37 38 39 40 41 0 / - */ - const IndexType rows = 7; - const IndexType cols = 6; + /* + * Sets up the following 7x6 dense matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); - m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. - m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. + m.setElement( 5, 5, 0); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); } template< typename Matrix > void test_Reset() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 0 0 0 0 \ - * | 0 0 0 0 | - * | 0 0 0 0 | - * | 0 0 0 0 | - * \ 0 0 0 0 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + /* + * Sets up the following 5x4 dense matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - m.reset(); + m.reset(); - EXPECT_EQ( m.getRows(), 0 ); - EXPECT_EQ( m.getColumns(), 0 ); + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); } template< typename Matrix > void test_SetValue() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 7x6 dense matrix: - * - * / 1 2 3 4 5 6 \ - * | 7 8 9 10 11 12 | - * | 13 14 15 16 17 18 | - * | 19 20 21 22 23 24 | - * | 25 26 27 28 29 30 | - * | 31 32 33 34 35 36 | - * \ 37 38 39 40 41 42 / - */ - const IndexType rows = 7; - const IndexType cols = 6; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 7x6 dense matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); + + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 7 ); - EXPECT_EQ( m.getElement( 1, 1 ), 8 ); - EXPECT_EQ( m.getElement( 1, 2 ), 9 ); - EXPECT_EQ( m.getElement( 1, 3 ), 10 ); - EXPECT_EQ( m.getElement( 1, 4 ), 11 ); - EXPECT_EQ( m.getElement( 1, 5 ), 12 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 13 ); - EXPECT_EQ( m.getElement( 2, 1 ), 14 ); - EXPECT_EQ( m.getElement( 2, 2 ), 15 ); - EXPECT_EQ( m.getElement( 2, 3 ), 16 ); - EXPECT_EQ( m.getElement( 2, 4 ), 17 ); - EXPECT_EQ( m.getElement( 2, 5 ), 18 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 19 ); - EXPECT_EQ( m.getElement( 3, 1 ), 20 ); - EXPECT_EQ( m.getElement( 3, 2 ), 21 ); - EXPECT_EQ( m.getElement( 3, 3 ), 22 ); - EXPECT_EQ( m.getElement( 3, 4 ), 23 ); - EXPECT_EQ( m.getElement( 3, 5 ), 24 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 25 ); - EXPECT_EQ( m.getElement( 4, 1 ), 26 ); - EXPECT_EQ( m.getElement( 4, 2 ), 27 ); - EXPECT_EQ( m.getElement( 4, 3 ), 28 ); - EXPECT_EQ( m.getElement( 4, 4 ), 29 ); - EXPECT_EQ( m.getElement( 4, 5 ), 30 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 31 ); - EXPECT_EQ( m.getElement( 5, 1 ), 32 ); - EXPECT_EQ( m.getElement( 5, 2 ), 33 ); - EXPECT_EQ( m.getElement( 5, 3 ), 34 ); - EXPECT_EQ( m.getElement( 5, 4 ), 35 ); - EXPECT_EQ( m.getElement( 5, 5 ), 36 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 37 ); - EXPECT_EQ( m.getElement( 6, 1 ), 38 ); - EXPECT_EQ( m.getElement( 6, 2 ), 39 ); - EXPECT_EQ( m.getElement( 6, 3 ), 40 ); - EXPECT_EQ( m.getElement( 6, 4 ), 41 ); - EXPECT_EQ( m.getElement( 6, 5 ), 42 ); - - // Set the values of all elements to a certain number - m.setValue( 42 ); - - EXPECT_EQ( m.getElement( 0, 0 ), 42 ); - EXPECT_EQ( m.getElement( 0, 1 ), 42 ); - EXPECT_EQ( m.getElement( 0, 2 ), 42 ); - EXPECT_EQ( m.getElement( 0, 3 ), 42 ); - EXPECT_EQ( m.getElement( 0, 4 ), 42 ); - EXPECT_EQ( m.getElement( 0, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 42 ); - EXPECT_EQ( m.getElement( 1, 1 ), 42 ); - EXPECT_EQ( m.getElement( 1, 2 ), 42 ); - EXPECT_EQ( m.getElement( 1, 3 ), 42 ); - EXPECT_EQ( m.getElement( 1, 4 ), 42 ); - EXPECT_EQ( m.getElement( 1, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 42 ); - EXPECT_EQ( m.getElement( 2, 1 ), 42 ); - EXPECT_EQ( m.getElement( 2, 2 ), 42 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 42 ); - EXPECT_EQ( m.getElement( 2, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 42 ); - EXPECT_EQ( m.getElement( 3, 1 ), 42 ); - EXPECT_EQ( m.getElement( 3, 2 ), 42 ); - EXPECT_EQ( m.getElement( 3, 3 ), 42 ); - EXPECT_EQ( m.getElement( 3, 4 ), 42 ); - EXPECT_EQ( m.getElement( 3, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 42 ); - EXPECT_EQ( m.getElement( 4, 1 ), 42 ); - EXPECT_EQ( m.getElement( 4, 2 ), 42 ); - EXPECT_EQ( m.getElement( 4, 3 ), 42 ); - EXPECT_EQ( m.getElement( 4, 4 ), 42 ); - EXPECT_EQ( m.getElement( 4, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 42 ); - EXPECT_EQ( m.getElement( 5, 1 ), 42 ); - EXPECT_EQ( m.getElement( 5, 2 ), 42 ); - EXPECT_EQ( m.getElement( 5, 3 ), 42 ); - EXPECT_EQ( m.getElement( 5, 4 ), 42 ); - EXPECT_EQ( m.getElement( 5, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 42 ); - EXPECT_EQ( m.getElement( 6, 1 ), 42 ); - EXPECT_EQ( m.getElement( 6, 2 ), 42 ); - EXPECT_EQ( m.getElement( 6, 3 ), 42 ); - EXPECT_EQ( m.getElement( 6, 4 ), 42 ); - EXPECT_EQ( m.getElement( 6, 5 ), 42 ); + m.setElement( 5, 5, 0); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 5 ); + EXPECT_EQ( m.getElement( 2, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 8 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 10 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 11 ); + EXPECT_EQ( m.getElement( 4, 4 ), 12 ); + EXPECT_EQ( m.getElement( 4, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 14 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 16 ); + + // Set the values of all elements to a certain number + m.setValue( 42 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); + EXPECT_EQ( m.getElement( 0, 1 ), 42 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); + EXPECT_EQ( m.getElement( 1, 1 ), 42 ); + EXPECT_EQ( m.getElement( 1, 2 ), 42 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 42 ); + EXPECT_EQ( m.getElement( 2, 2 ), 42 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 42 ); + EXPECT_EQ( m.getElement( 3, 3 ), 42 ); + EXPECT_EQ( m.getElement( 3, 4 ), 42 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 42 ); + EXPECT_EQ( m.getElement( 4, 4 ), 42 ); + EXPECT_EQ( m.getElement( 4, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 42 ); + EXPECT_EQ( m.getElement( 5, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); } template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * \ 21 22 23 24 25 / - */ - const IndexType rows = 5; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x5 dense matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * \ 0 0 0 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) > 1 ) + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + else m.setElement( i, j, value++ ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); } template< typename Matrix > void test_AddElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 dense matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - RealType newValue = 1; - RealType multiplicator = 2; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 dense matrix: + * + * / 1 2 0 0 0 \ / 1 2 0 0 0 \ / 3 6 0 0 0 \ + * | 6 7 8 0 0 | | 3 4 5 0 0 | | 15 18 21 0 0 | + * 2 * | 0 12 13 14 0 | + | 0 6 7 8 0 | = | 0 30 33 36 0 | + * | 0 0 18 19 20 | | 0 0 9 10 11 | | 0 0 45 48 51 | + * | 0 0 0 24 25 | | 0 0 0 12 13 | | 0 0 0 60 63 | + * \ 0 0 0 0 30 / \ 0 0 0 0 14 / \ 0 0 0 0 74 / + */ + + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( abs( i - j ) <= 1 ) m.addElement( i, j, newValue++, multiplicator ); - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 12 ); - EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 18 ); - EXPECT_EQ( m.getElement( 1, 1 ), 21 ); - EXPECT_EQ( m.getElement( 1, 2 ), 24 ); - EXPECT_EQ( m.getElement( 1, 3 ), 27 ); - EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 33 ); - EXPECT_EQ( m.getElement( 2, 1 ), 36 ); - EXPECT_EQ( m.getElement( 2, 2 ), 39 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 48 ); - EXPECT_EQ( m.getElement( 3, 1 ), 51 ); - EXPECT_EQ( m.getElement( 3, 2 ), 54 ); - EXPECT_EQ( m.getElement( 3, 3 ), 57 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 63 ); - EXPECT_EQ( m.getElement( 4, 1 ), 66 ); - EXPECT_EQ( m.getElement( 4, 2 ), 69 ); - EXPECT_EQ( m.getElement( 4, 3 ), 72 ); - EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 78 ); - EXPECT_EQ( m.getElement( 5, 1 ), 81 ); - EXPECT_EQ( m.getElement( 5, 2 ), 84 ); - EXPECT_EQ( m.getElement( 5, 3 ), 87 ); - EXPECT_EQ( m.getElement( 5, 4 ), 90 ); + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 15 ); + EXPECT_EQ( m.getElement( 1, 1 ), 18 ); + EXPECT_EQ( m.getElement( 1, 2 ), 21 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 30 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 36 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 45 ); + EXPECT_EQ( m.getElement( 3, 3 ), 48 ); + EXPECT_EQ( m.getElement( 3, 4 ), 51 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 60 ); + EXPECT_EQ( m.getElement( 4, 4 ), 63 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 74 ); } template< typename Matrix > @@ -559,61 +564,54 @@ void test_SetRow() /* * Sets up the following 3x7 dense matrix: * - * / 1 2 3 4 5 6 7 \ - * | 8 9 10 11 12 13 14 | - * \ 15 16 17 18 19 20 21 / + * / 1 2 0 0 0 0 0 \ + * | 8 9 10 0 0 0 0 | + * \ 0 16 17 18 0 0 0 / */ const IndexType rows = 3; const IndexType cols = 7; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 3 ][ 5 ] { - { 11, 11, 11, 11, 11 }, - { 22, 22, 22, 22, 22 }, - { 33, 33, 33, 33, 33 } }; - IndexType columnIndexes[ 3 ][ 5 ] { - { 0, 1, 2, 3, 4 }, - { 0, 1, 2, 3, 4 }, - { 2, 3, 4, 5, 6 } }; + RealType values[ 3 ][ 3 ] { + { 1, 2, 0 }, + { 8, 9, 10 }, + { 16, 17, 18 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 5; i++ ) - row.setElement( i, values[ rowIdx ][ i ] ); + for( IndexType i = 0; i < 3; i++ ) + { + if( rowIdx == 0 && i > 1 ) + break; + row.setElement( i, values[ rowIdx ][ i ] ); + } }; TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 11 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - EXPECT_EQ( m.getElement( 0, 6 ), 7 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 22 ); - EXPECT_EQ( m.getElement( 1, 1 ), 22 ); - EXPECT_EQ( m.getElement( 1, 2 ), 22 ); - EXPECT_EQ( m.getElement( 1, 3 ), 22 ); - EXPECT_EQ( m.getElement( 1, 4 ), 22 ); - EXPECT_EQ( m.getElement( 1, 5 ), 13 ); - EXPECT_EQ( m.getElement( 1, 6 ), 14 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 8 ); + EXPECT_EQ( m.getElement( 1, 1 ), 9 ); + EXPECT_EQ( m.getElement( 1, 2 ), 10 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 16 ); - EXPECT_EQ( m.getElement( 2, 2 ), 33 ); - EXPECT_EQ( m.getElement( 2, 3 ), 33 ); - EXPECT_EQ( m.getElement( 2, 4 ), 33 ); - EXPECT_EQ( m.getElement( 2, 5 ), 33 ); - EXPECT_EQ( m.getElement( 2, 6 ), 33 ); + EXPECT_EQ( m.getElement( 2, 2 ), 17 ); + EXPECT_EQ( m.getElement( 2, 3 ), 18 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); } template< typename Matrix > @@ -625,12 +623,12 @@ void test_AddRow() /* * Sets up the following 6x5 dense matrix: * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / */ const IndexType rows = 6; @@ -641,68 +639,72 @@ void test_AddRow() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); EXPECT_EQ( m.getElement( 4, 3 ), 24 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); EXPECT_EQ( m.getElement( 5, 4 ), 30 ); // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 sparse matrix: * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / + * / 0 0 0 0 0 0 \ / 1 2 0 0 0 \ / 11 11 0 0 0 \ / 11 11 0 0 0 \ + * | 0 1 0 0 0 0 | | 6 7 8 0 0 | | 22 22 22 0 0 | | 28 29 30 0 0 | + * | 0 0 2 0 0 0 | * | 0 12 13 14 0 | + | 0 33 33 33 0 | = | 0 57 59 61 0 | + * | 0 0 0 3 0 0 | | 0 0 18 19 20 | | 0 0 44 44 44 | | 0 0 98 101 104 | + * | 0 0 0 0 4 0 | | 0 0 0 24 25 | | 0 0 0 55 55 | | 0 0 0 151 155 | + * \ 0 0 0 0 0 5 / \ 0 0 0 0 30 / \ 0 0 0 0 66 / \ 0 0 0 0 216 / */ auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 6 ][ 5 ] { - { 11, 11, 11, 11, 0 }, - { 22, 22, 22, 22, 0 }, - { 33, 33, 33, 33, 0 }, - { 44, 44, 44, 44, 0 }, - { 55, 55, 55, 55, 0 }, - { 66, 66, 66, 66, 0 } }; + RealType values[ 6 ][ 3 ] { + { 11, 11, 0 }, + { 22, 22, 22 }, + { 33, 33, 33 }, + { 44, 44, 44 }, + { 55, 55, 55 }, + { 66, 66, 66 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 5; i++ ) + for( IndexType i = 0; i < 3; i++ ) { RealType& val = row.getValue( i ); val = rowIdx * val + values[ rowIdx ][ i ]; @@ -711,90 +713,86 @@ void test_AddRow() TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 28 ); - EXPECT_EQ( m.getElement( 1, 1 ), 29 ); - EXPECT_EQ( m.getElement( 1, 2 ), 30 ); - EXPECT_EQ( m.getElement( 1, 3 ), 31 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 55 ); - EXPECT_EQ( m.getElement( 2, 1 ), 57 ); - EXPECT_EQ( m.getElement( 2, 2 ), 59 ); - EXPECT_EQ( m.getElement( 2, 3 ), 61 ); - EXPECT_EQ( m.getElement( 2, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 92 ); - EXPECT_EQ( m.getElement( 3, 1 ), 95 ); - EXPECT_EQ( m.getElement( 3, 2 ), 98 ); - EXPECT_EQ( m.getElement( 3, 3 ), 101 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 139 ); - EXPECT_EQ( m.getElement( 4, 1 ), 143 ); - EXPECT_EQ( m.getElement( 4, 2 ), 147 ); - EXPECT_EQ( m.getElement( 4, 3 ), 151 ); - EXPECT_EQ( m.getElement( 4, 4 ), 100 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 196 ); - EXPECT_EQ( m.getElement( 5, 1 ), 201 ); - EXPECT_EQ( m.getElement( 5, 2 ), 206 ); - EXPECT_EQ( m.getElement( 5, 3 ), 211 ); - EXPECT_EQ( m.getElement( 5, 4 ), 150 ); + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 104 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 155 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 216 ); } template< typename Matrix > void test_VectorProduct() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value++ ); - VectorType inVector; - inVector.setSize( 4 ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) - inVector.setElement( i, 2 ); + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - VectorType outVector; - outVector.setSize( 5 ); - for( IndexType j = 0; j < outVector.getSize(); j++ ) - outVector.setElement( j, 0 ); + VectorType inVector( 4 ); + inVector = 2; + VectorType outVector( 5 ); + outVector = 0; - m.vectorProduct( inVector, outVector); + m.vectorProduct( inVector, outVector); - EXPECT_EQ( outVector.getElement( 0 ), 20 ); - EXPECT_EQ( outVector.getElement( 1 ), 52 ); - EXPECT_EQ( outVector.getElement( 2 ), 84 ); - EXPECT_EQ( outVector.getElement( 3 ), 116 ); - EXPECT_EQ( outVector.getElement( 4 ), 148 ); + std::cerr << outVector << std::endl; + EXPECT_EQ( outVector.getElement( 0 ), 6 ); + EXPECT_EQ( outVector.getElement( 1 ), 36 ); + EXPECT_EQ( outVector.getElement( 2 ), 66 ); + EXPECT_EQ( outVector.getElement( 3 ), 62 ); + EXPECT_EQ( outVector.getElement( 4 ), 40 ); } template< typename Matrix > @@ -1388,6 +1386,13 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } +TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetCompressedRowLengths< MatrixType >(); +} + TYPED_TEST( MatrixTest, getRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1395,11 +1400,11 @@ TYPED_TEST( MatrixTest, getRowLengthTest ) test_GetRowLength< MatrixType >(); } -TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) +TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNumberOfMatrixElements< MatrixType >(); + test_GetAllocatedElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) -- GitLab From 7eb350bf9e4735b0c2e4653f91963360fbe5e97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 13:46:45 +0100 Subject: [PATCH 083/179] Fixing Tridiagonal matrix unit tests. --- src/TNL/Matrices/Tridiagonal.h | 10 +- src/TNL/Matrices/Tridiagonal.hpp | 251 +++------ src/TNL/Matrices/TridiagonalMatrixView.h | 6 + src/TNL/Matrices/TridiagonalMatrixView.hpp | 183 ++++--- .../details/TridiagonalMatrixIndexer.h | 2 +- src/UnitTests/Matrices/CMakeLists.txt | 12 +- .../Matrices/TridiagonalMatrixTest.h | 478 +++++++++--------- 7 files changed, 430 insertions(+), 512 deletions(-) diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index d28270156..e7e3ab6b2 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -50,13 +50,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > typename _Index = Index > using Self = Tridiagonal< _Real, _Device, _Index >; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + Tridiagonal(); Tridiagonal( const IndexType rows, const IndexType columns ); - ViewType getView(); + ViewType getView() const; // TODO: remove const - ConstViewType getConstView() const; + //ConstViewType getConstView() const; static String getSerializationType(); @@ -168,6 +170,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void print( std::ostream& str ) const; + const IndexerType& getIndexer() const; + + IndexerType& getIndexer(); + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index c6d359d3b..6c09238ff 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -49,12 +49,13 @@ template< typename Real, typename RealAllocator > auto Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -getView() -> ViewType +getView() const -> ViewType { - return ViewType( this->values.getView(), indexer ); + // TODO: fix when getConstView works + return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer ); } -template< typename Real, +/*template< typename Real, typename Device, typename Index, bool RowMajorOrder, @@ -64,7 +65,7 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), indexer ); -} +}*/ template< typename Real, typename Device, @@ -146,19 +147,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { return this->view.getCompressedRowLengths( rowLengths ); - /*rowLengths.setSize( this->getRows() ); - rowLengths = 0; - auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { - return ( value != 0.0 ); - }; - auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; - }; - this->allRowsReduction( fetch, reduce, keep, 0 );*/ } template< typename Real, @@ -171,7 +159,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRowLength( const IndexType row ) const { return this->view.getRowLength( row ); - //return this->indexer.getRowSize( row ); } template< typename Real, @@ -209,11 +196,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getNumberOfNonzeroMatrixElements() const { return this->view.getNumberOfNonzeroMatrixElements(); - /*const auto values_view = this->values.getConstView(); - auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { - return ( values_view[ i ] != 0.0 ); - }; - return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/ } template< typename Real, @@ -283,7 +265,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); - //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -297,7 +278,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); - //return RowView( this->values.getView(), this->indexer ); } template< typename Real, @@ -310,18 +290,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { return this->view.setElement( row, column, value ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) - { - std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; - throw std::logic_error( msg.str() ); - } - this->values.setElement( this->getElementIndex( row, column ), value ); - return true;*/ } template< typename Real, @@ -337,19 +305,6 @@ addElement( const IndexType row, const RealType& thisElementMultiplicator ) { return this->view.addElement( row, column, value, thisElementMultiplicator ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) - { - std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; - throw std::logic_error( msg.str() ); - } - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true;*/ } template< typename Real, @@ -362,14 +317,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); - /*TNL_ASSERT_GE( row, 0, "" ); - TNL_ASSERT_LT( row, this->getRows(), "" ); - TNL_ASSERT_GE( column, 0, "" ); - TNL_ASSERT_LT( column, this->getColumns(), "" ); - - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) );*/ } template< typename Real, @@ -383,39 +330,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); - /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); - const auto values_view = this->values.getConstView(); - const auto indexer = this->indexer; - const auto zero = zero_; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - Real_ sum( zero ); - if( rowIdx == 0 ) - { - reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); - reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); - keep( 0, sum ); - return; - } - if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) - { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); - keep( rowIdx, sum ); - return; - } - if( indexer.getRows() == indexer.getColumns() ) - { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - keep( rowIdx, sum ); - } - else - { - keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -442,45 +356,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); - /*const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; - if( rowIdx == 0 ) - { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; - } - if( rows == columns ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - } - else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -494,45 +369,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); - /*const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; - auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; - if( rowIdx == 0 ) - { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; - } - if( rows == columns ) - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - } - else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } - }; - Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/ } template< typename Real, @@ -573,11 +409,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct(); - /*return TridiagonalDeviceDependentCode< Device >:: - rowVectorProduct( this->rows, - this->values, - row, - vector );*/ } template< typename Real, @@ -592,14 +423,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { this->view.vectorProduct( inVector, outVector ); - /*TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl );*/ - - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, @@ -614,14 +437,7 @@ addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix columns: " << this->getColumns() << std::endl - << "This matrix rows: " << this->getRows() << std::endl ); - - if( thisMatrixMultiplicator == 1.0 ) - this->values += matrixMultiplicator * matrix.values; - else - this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; + this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator ); } #ifdef HAVE_CUDA @@ -753,11 +569,31 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca "unknown device" ); this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet."); + if( RowMajorOrder == RowMajorOrder_ ) + this->values = matrix.getValues(); + else + { + if( std::is_same< Device, Device_ >::value ) + { + const auto matrix_view = matrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + else + { + Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; + auxMatrix = matrix; + const auto matrix_view = auxMatrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + } } - template< typename Real, typename Device, typename Index, @@ -777,6 +613,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil { Matrix< Real, Device, Index >::load( file ); this->indexer.setDimensions( this->getRows(), this->getColumns() ); + this->view = this->getView(); } template< typename Real, @@ -804,7 +641,9 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -816,6 +655,30 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( st } } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 78593acf5..290062793 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -146,6 +146,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > void print( std::ostream& str ) const; + __cuda_callable__ + const IndexerType& getIndexer() const; + + __cuda_callable__ + IndexerType& getIndexer(); + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 83ff6035d..4d4950c4e 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -297,7 +297,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke keep( 0, sum ); return; } - if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() ) + if( rowIdx + 1 < indexer.getColumns() ) { reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); @@ -305,7 +305,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke keep( rowIdx, sum ); return; } - if( indexer.getRows() == indexer.getColumns() ) + if( rowIdx < indexer.getColumns() ) { reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); @@ -313,7 +313,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke } else { - keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); @@ -328,7 +328,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); + this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero ); } template< typename Real, @@ -341,42 +341,26 @@ TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); } - if( rows == columns ) + else if( rowIdx < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); } else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -390,43 +374,27 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) { - const auto values_view = this->values.getConstView(); - const auto indexer_ = this->indexer; - const auto rows = this->getRows(); - const auto columns = this->getColumns(); - const auto size = this->size; + auto values_view = this->values.getView(); + const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - //bool compute; if( rowIdx == 0 ) { - IndexType i_0 = indexer.getGlobalIndex( 0, 0 ); - IndexType i_1 = indexer.getGlobalIndex( 0, 1 ); - function( 0, 1, rowIdx, values_view[ i_0 ] ); - function( 0, 2, rowIdx + 1, values_view[ i_1 ] ); - return; - } - if( rowIdx < size || columns > rows ) + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] ); - return; + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); } - if( rows == columns ) + else if( rowIdx < indexer.getColumns() ) { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 ); - function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] ); - function( rowIdx, 1, rowIdx, values_view[ i_1 ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); } else - { - IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 ); - function( rowIdx, 0, rowIdx, values_view[ i_0 ] ); - } + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -440,7 +408,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) const { - this->forRows( 0, this->getRows(), function ); + this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); } template< typename Real, @@ -452,7 +420,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) { - this->forRows( 0, this->getRows(), function ); + this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function ); } template< typename Real, @@ -477,14 +445,22 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { + return value * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, @@ -498,18 +474,41 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ > const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix columns: " << this->getColumns() << std::endl - << "This matrix rows: " << this->getRows() << std::endl ); + TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); + TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); - if( thisMatrixMultiplicator == 1.0 ) - this->values += matrixMultiplicator * matrix.values; + if( RowMajorOrder == RowMajorOrder_ ) + { + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.getValues(); + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues(); + } else - this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; + { + const auto matrix_view = matrix; + const auto matrixMult = matrixMultiplicator; + const auto thisMult = thisMatrixMultiplicator; + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + if( thisMult == 0.0 ) + this->forAllRows( add0 ); + else if( thisMult == 1.0 ) + this->forAllRows( add1 ); + else + this->forAllRows( addGen ); + } } #ifdef HAVE_CUDA -template< typename Real, +/*template< typename Real, typename Real2, typename Index, typename Index2 > @@ -533,7 +532,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De rowIdx, matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); } -} +}*/ #endif template< typename Real, @@ -563,7 +562,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Tridiagonal* kernel_this = Cuda::passToDevice( *this ); + /*Tridiagonal* kernel_this = Cuda::passToDevice( *this ); typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); @@ -581,7 +580,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, } Cuda::freeFromDevice( kernel_this ); Cuda::freeFromDevice( kernel_inMatrix ); - TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE;*/ #endif } } @@ -644,6 +643,30 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os } } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h index d9fdd0c23..6d3377b4f 100644 --- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -65,7 +65,7 @@ class TridiagonalMatrixIndexer const IndexType& getColumns() const { return this->columns; }; __cuda_callable__ - const IndexType& getSize() const { return this->nonEmptyRows; }; + const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; }; __cuda_callable__ IndexType getStorageSize() const { return 3 * this->nonEmptyRows; }; diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 287495405..4b95380c4 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -13,8 +13,8 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) +# CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -42,9 +42,9 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) +# ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) +# TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) @@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index dcd14302a..2c476670b 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -8,6 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ +#include #include #include #include @@ -774,8 +775,11 @@ void test_VectorProduct() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) + { if( abs( i - j ) <= 1 ) - m.setElement( i, j, value++ ); + m.setElement( i, j, value ); + value++; + } using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; @@ -787,7 +791,6 @@ void test_VectorProduct() m.vectorProduct( inVector, outVector); - std::cerr << outVector << std::endl; EXPECT_EQ( outVector.getElement( 0 ), 6 ); EXPECT_EQ( outVector.getElement( 1 ), 36 ); EXPECT_EQ( outVector.getElement( 2 ), 66 ); @@ -795,122 +798,123 @@ void test_VectorProduct() EXPECT_EQ( outVector.getElement( 4 ), 40 ); } -template< typename Matrix > +template< typename Matrix1, typename Matrix2 = Matrix1 > void test_AddMatrix() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix1 m( rows, cols ); -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - Matrix m2; - m2.reset(); - m2.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 dense matrix: + * + * / 1 2 0 0 \ + * | 3 4 5 0 | + * | 0 6 7 8 | + * | 0 0 9 10 | + * \ 0 0 0 11 / + */ + Matrix2 m2( rows, cols ); - RealType newValue = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) m2.setElement( i, j, newValue++ ); - /* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + /* + * Compute the following 5x4 dense matrix: + * + * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ + * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | + * | 0 10 11 12 | + 2 * | 0 6 7 8 | = | 0 22 25 28 | + * | 0 0 15 16 | | 0 0 9 10 | | 0 0 33 36 | + * \ 0 0 0 20 / \ 0 0 0 11 / \ 0 0 0 42 / + */ - Matrix mResult; - mResult.reset(); - mResult.setDimensions( rows, cols ); - - mResult = m; - - RealType matrixMultiplicator = 2; - RealType thisMatrixMultiplicator = 1; - - mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); - EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); - EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); - EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); - EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); - EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); - EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); - EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); - EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); - EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); - EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); - EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); - EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); - EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); - EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); - EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); - EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); - EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); - EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); - EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); - EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); - EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); - EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); - EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); - EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); - EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); - EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); - EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); - EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); - EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); - EXPECT_EQ( mResult.getElement( 4, 3 ), 60 ); + Matrix1 mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 11 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 14 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 17 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 22 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 25 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 28 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 42 ); } template< typename Matrix > @@ -1162,43 +1166,44 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - hostMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + hostMatrix.setElement( i, j, i + j ); Matrix matrix( rows, columns ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) - { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else - EXPECT_EQ( matrix.getElement( i, j ), i + j ); - } + if( abs( i - j ) <= 1 ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); #ifdef HAVE_CUDA TridiagonalCuda cudaMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; matrix = cudaMatrix; - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j < rows; j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else + if( abs( i - j ) <= 1 ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } #endif } @@ -1207,123 +1212,125 @@ void test_AssignmentOperator() template< typename Matrix > void test_SaveAndLoad() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 4x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * \ 13 14 15 16 / - */ - const IndexType rows = 4; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix savedMatrix; - savedMatrix.reset(); - savedMatrix.setDimensions( rows, cols ); + /* + * Sets up the following 4x4 dense matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * \ 0 0 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - savedMatrix.setElement( i, j, value++ ); - - ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( rows, cols ); - - ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); + Matrix savedMatrix( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + savedMatrix.setElement( i, j, value ); + value++; + } + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); } template< typename Matrix > void test_Print() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - #include - std::stringstream printed; - std::stringstream couted; + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + std::stringstream printed; + std::stringstream couted; - m.print( std::cout ); //all the std::cout goes to ss + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); - std::cout.rdbuf(old_buf); //reset + m.print( std::cout ); //all the std::cout goes to ss - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" - "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" - "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" - "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" - "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" + "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" + "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" + "Row: 3 -> Col:2->15\t Col:3->16\t\n" + "Row: 4 -> Col:3->20\t\n"; - EXPECT_EQ( printed.str(), couted.str() ); + EXPECT_EQ( printed.str(), couted.str() ); } // test fixture for typed tests @@ -1470,6 +1477,19 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) +{ + using MatrixType = typename TestFixture::MatrixType; + + using RealType = typename MatrixType::RealType; + using DeviceType = typename MatrixType::DeviceType; + using IndexType = typename MatrixType::IndexType; + using RealAllocatorType = typename MatrixType::RealAllocatorType; + using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; + + test_AddMatrix< MatrixType, MatrixType2 >(); +} + TYPED_TEST( MatrixTest, assignmentOperatorTest ) { using MatrixType = typename TestFixture::MatrixType; -- GitLab From 97de6cd9c525615b191a5ead10d2746f0c8e8060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 13:48:13 +0100 Subject: [PATCH 084/179] Commenting DistributedMatrixTest of getCompressedRowLength - it does not work with the new meaning of the method. --- src/UnitTests/Matrices/DistributedMatrixTest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h index a1a9f3eb8..9487e5977 100644 --- a/src/UnitTests/Matrices/DistributedMatrixTest.h +++ b/src/UnitTests/Matrices/DistributedMatrixTest.h @@ -171,7 +171,7 @@ TYPED_TEST( DistributedMatrixTest, getCompressedRowLengths ) this->matrix.setCompressedRowLengths( this->rowLengths ); RowLengthsVector output; - this->matrix.getCompressedRowLengths( output ); + this->matrix.getCompressedRowLengths( output ); // TODO: replace this with getRowCapacities EXPECT_EQ( output, this->rowLengths ); } -- GitLab From 7104d153305e1f8e3ee9aae3ead38db516cb5577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 13:50:25 +0100 Subject: [PATCH 085/179] Renaming Matrix::getNumberOfMetrixElements to getAllocatedElementdCount. --- src/Python/pytnl/tnl/SparseMatrix.h | 2 +- src/TNL/Matrices/Legacy/Sparse_impl.h | 2 +- src/TNL/Matrices/MatrixReader_impl.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 03ec5814c..b4cc0fc1a 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -72,7 +72,7 @@ void export_Matrix( py::module & m, const char* name ) .def("getCompressedRowLengths", _getCompressedRowLengths) // TODO: export for more types .def("setLike", &Matrix::template setLike< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >) - .def("getNumberOfMatrixElements", &Matrix::getNumberOfMatrixElements) + .def("getAllocatedElementsCount", &Matrix::getAllocatedElementsCount) .def("getNumberOfNonzeroMatrixElements", &Matrix::getNumberOfNonzeroMatrixElements) .def("reset", &Matrix::reset) .def("getRows", &Matrix::getRows) diff --git a/src/TNL/Matrices/Legacy/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h index 889d92e62..3e4794412 100644 --- a/src/TNL/Matrices/Legacy/Sparse_impl.h +++ b/src/TNL/Matrices/Legacy/Sparse_impl.h @@ -33,7 +33,7 @@ template< typename Real, void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix ) { Matrix< Real, Device, Index >::setLike( matrix ); - this->allocateMatrixElements( matrix.getNumberOfMatrixElements() ); + this->allocateMatrixElements( matrix.getAllocatedElementsCount() ); } diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index 476a7327e..a80d00283 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -340,7 +340,7 @@ void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, long int fileSize = file.tellg(); timer.stop(); if( verbose ) - std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements() + std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getAllocatedElementsCount() << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; } diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 6189d43d3..3f5636bb6 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -412,6 +412,9 @@ vectorProduct( const InVector& inVector, const RealType& matrixMultiplicator, const RealType& inVectorAddition ) const { + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); -- GitLab From 43533ef1b605924e2bdc6ee4b36021b1c6a6b49b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 13:52:16 +0100 Subject: [PATCH 086/179] Making MatrixView::getValues() __cuda_callable__. --- src/TNL/Matrices/MatrixView.h | 2 ++ src/TNL/Matrices/MatrixView.hpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index b8adfd791..467d02349 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -85,8 +85,10 @@ public: virtual Real getElement( const IndexType row, const IndexType column ) const = 0; + __cuda_callable__ const ValuesView& getValues() const; + __cuda_callable__ ValuesView& getValues(); /** diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index b2739ae1d..275a22870 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -102,6 +102,7 @@ Index MatrixView< Real, Device, Index >::getColumns() const template< typename Real, typename Device, typename Index > +__cuda_callable__ const typename MatrixView< Real, Device, Index >::ValuesView& MatrixView< Real, Device, Index >:: getValues() const @@ -112,6 +113,7 @@ getValues() const template< typename Real, typename Device, typename Index > +__cuda_callable__ typename MatrixView< Real, Device, Index >::ValuesView& MatrixView< Real, Device, Index >:: getValues() -- GitLab From 897b35c3b1548f07fc60962592765a025d88d074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 13:53:20 +0100 Subject: [PATCH 087/179] Commenting out dated implementation of Matrix::getCompressedRowLengths. --- src/TNL/Matrices/Matrix.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index efd26e1fa..4ddbacde5 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -72,9 +72,9 @@ template< typename Real, typename RealAllocator > void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { - TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); - for( IndexType row = 0; row < this->getRows(); row++ ) - rowLengths.setElement( row, this->getRowLength( row ) ); + //TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + //for( IndexType row = 0; row < this->getRows(); row++ ) + // rowLengths.setElement( row, this->getRowLength( row ) ); } template< typename Real, -- GitLab From e9fc173c4ebc43475983c7dfbadcc39ed44bb8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 11 Jan 2020 17:13:53 +0100 Subject: [PATCH 088/179] Moving Multidiagonal matrix to Legacy. --- src/TNL/Matrices/{ => Legacy}/Multidiagonal.h | 4 ++-- src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter.h | 4 ++-- .../Matrices/{ => Legacy}/MultidiagonalMatrixSetter_impl.h | 0 src/TNL/Matrices/{ => Legacy}/MultidiagonalRow.h | 2 +- src/TNL/Matrices/{ => Legacy}/MultidiagonalRow_impl.h | 0 src/TNL/Matrices/{ => Legacy}/Multidiagonal_impl.h | 2 +- src/TNL/Problems/HeatEquationProblem_impl.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename src/TNL/Matrices/{ => Legacy}/Multidiagonal.h (98%) rename src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter.h (96%) rename src/TNL/Matrices/{ => Legacy}/MultidiagonalMatrixSetter_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/MultidiagonalRow.h (96%) rename src/TNL/Matrices/{ => Legacy}/MultidiagonalRow_impl.h (100%) rename src/TNL/Matrices/{ => Legacy}/Multidiagonal_impl.h (99%) diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Legacy/Multidiagonal.h similarity index 98% rename from src/TNL/Matrices/Multidiagonal.h rename to src/TNL/Matrices/Legacy/Multidiagonal.h index 1ee6a25e9..d9f1379f7 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Legacy/Multidiagonal.h @@ -12,7 +12,7 @@ #include #include -#include +#include namespace TNL { namespace Matrices { @@ -221,4 +221,4 @@ protected: } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h similarity index 96% rename from src/TNL/Matrices/MultidiagonalMatrixSetter.h rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h index c10d0cc57..f9e7ef135 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixSetter.h +++ b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h @@ -11,7 +11,7 @@ #pragma once #include -#include +#include namespace TNL { namespace Matrices { @@ -85,4 +85,4 @@ class MultidiagonalMatrixSetter< Meshes::Grid< 3, MeshReal, Device, MeshIndex > } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h similarity index 100% rename from src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h diff --git a/src/TNL/Matrices/MultidiagonalRow.h b/src/TNL/Matrices/Legacy/MultidiagonalRow.h similarity index 96% rename from src/TNL/Matrices/MultidiagonalRow.h rename to src/TNL/Matrices/Legacy/MultidiagonalRow.h index 1d465d229..c41541ead 100644 --- a/src/TNL/Matrices/MultidiagonalRow.h +++ b/src/TNL/Matrices/Legacy/MultidiagonalRow.h @@ -54,5 +54,5 @@ class MultidiagonalRow } // namespace Matrices } // namespace TNL -#include +#include diff --git a/src/TNL/Matrices/MultidiagonalRow_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h similarity index 100% rename from src/TNL/Matrices/MultidiagonalRow_impl.h rename to src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h similarity index 99% rename from src/TNL/Matrices/Multidiagonal_impl.h rename to src/TNL/Matrices/Legacy/Multidiagonal_impl.h index 76f54f748..375e01c6d 100644 --- a/src/TNL/Matrices/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h @@ -10,7 +10,7 @@ #pragma once -#include +#include #include #include #include diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h index bc339e9b3..98cd6d5e4 100644 --- a/src/TNL/Problems/HeatEquationProblem_impl.h +++ b/src/TNL/Problems/HeatEquationProblem_impl.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include -- GitLab From f40eb2d70fa1cd5f8ba729b93ca43df273f912cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 12 Jan 2020 13:03:39 +0100 Subject: [PATCH 089/179] Fixing tridiagonal matrix unit tests comments. --- .../Matrices/TridiagonalMatrixTest.h | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index 2c476670b..d9dc06599 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -182,7 +182,7 @@ void test_GetNumberOfNonzeroMatrixElements() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 7x6 dense matrix: + * Sets up the following 7x6 matrix: * * / 0 1 0 0 0 0 \ * | 2 3 4 0 0 0 | @@ -215,7 +215,7 @@ void test_Reset() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 0 0 0 0 \ * | 0 0 0 0 | @@ -242,7 +242,7 @@ void test_SetValue() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 7x6 dense matrix: + * Sets up the following 7x6 matrix: * * / 0 1 0 0 0 0 \ * | 2 3 4 0 0 0 | @@ -374,7 +374,7 @@ void test_SetElement() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x5 dense matrix: + * Sets up the following 5x5 matrix: * * / 1 2 0 0 0 \ * | 6 7 8 0 0 | @@ -438,7 +438,7 @@ void test_AddElement() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 6x5 dense matrix: + * Sets up the following 6x5 matrix: * * / 1 2 0 0 0 \ * | 6 7 8 0 0 | @@ -501,7 +501,7 @@ void test_AddElement() // Add new elements to the old elements with a multiplying factor applied to the old elements. /* - * The following setup results in the following 6x5 dense matrix: + * The following setup results in the following 6x5 matrix: * * / 1 2 0 0 0 \ / 1 2 0 0 0 \ / 3 6 0 0 0 \ * | 6 7 8 0 0 | | 3 4 5 0 0 | | 15 18 21 0 0 | @@ -563,7 +563,7 @@ void test_SetRow() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 3x7 dense matrix: + * Sets up the following 3x7 matrix: * * / 1 2 0 0 0 0 0 \ * | 8 9 10 0 0 0 0 | @@ -622,7 +622,7 @@ void test_AddRow() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 6x5 dense matrix: + * Sets up the following 6x5 matrix: * * / 1 2 0 0 0 \ * | 6 7 8 0 0 | @@ -759,7 +759,7 @@ void test_VectorProduct() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 1 2 0 0 \ * | 5 6 7 0 | @@ -806,7 +806,7 @@ void test_AddMatrix() using IndexType = typename Matrix1::IndexType; /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 1 2 0 0 \ * | 5 6 7 0 | @@ -829,7 +829,7 @@ void test_AddMatrix() } /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 1 2 0 0 \ * | 3 4 5 0 | @@ -846,7 +846,7 @@ void test_AddMatrix() m2.setElement( i, j, newValue++ ); /* - * Compute the following 5x4 dense matrix: + * Compute the following 5x4 matrix: * * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | @@ -924,7 +924,7 @@ void test_GetMatrixProduct() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 1 2 3 4 \ * | 5 6 7 8 | @@ -945,7 +945,7 @@ void test_GetMatrixProduct() leftMatrix.setElement( i, j, value++ ); /* - * Sets up the following 4x5 dense matrix: + * Sets up the following 4x5 matrix: * * / 1 2 3 4 5 \ * | 6 7 8 9 10 | @@ -965,7 +965,7 @@ void test_GetMatrixProduct() rightMatrix.setElement( i, j, newValue++ ); /* - * Sets up the following 5x5 resulting dense matrix: + * Sets up the following 5x5 resulting matrix: * * / 0 0 0 0 \ * | 0 0 0 0 | @@ -1029,7 +1029,7 @@ void test_GetTransposition() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 3x2 dense matrix: + * Sets up the following 3x2 matrix: * * / 1 2 \ * | 3 4 | @@ -1050,7 +1050,7 @@ void test_GetTransposition() m.print( std::cout ); /* - * Sets up the following 2x3 dense matrix: + * Sets up the following 2x3 matrix: * * / 0 0 0 \ * \ 0 0 0 / @@ -1068,7 +1068,7 @@ void test_GetTransposition() mTransposed.print( std::cout ); /* - * Should result in the following 2x3 dense matrix: + * Should result in the following 2x3 matrix: * * / 1 3 5 \ * \ 2 4 6 / @@ -1091,7 +1091,7 @@ void test_PerformSORIteration() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 4x4 dense matrix: + * Sets up the following 4x4 matrix: * * / 4 1 1 1 \ * | 1 4 1 1 | @@ -1217,7 +1217,7 @@ void test_SaveAndLoad() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 4x4 dense matrix: + * Sets up the following 4x4 matrix: * * / 1 2 0 0 \ * | 5 6 7 0 | -- GitLab From bac4cdfab2edee1613c286047ce7e38d787064f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 12 Jan 2020 13:04:38 +0100 Subject: [PATCH 090/179] Revision of multidiagonal matrix. --- src/TNL/Matrices/Multidiagonal.h | 217 +++ src/TNL/Matrices/Multidiagonal.hpp | 909 +++++++++ src/TNL/Matrices/MultidiagonalMatrixRowView.h | 59 + .../Matrices/MultidiagonalMatrixRowView.hpp | 75 + src/TNL/Matrices/MultidiagonalMatrixView.h | 181 ++ src/TNL/Matrices/MultidiagonalMatrixView.hpp | 729 +++++++ src/TNL/Matrices/TridiagonalMatrixView.h | 3 - .../details/MultidiagonalMatrixIndexer.h | 106 ++ src/UnitTests/Matrices/CMakeLists.txt | 12 +- .../Matrices/MultidiagonalMatrixTest.cpp | 2 +- .../Matrices/MultidiagonalMatrixTest.cu | 2 +- .../Matrices/MultidiagonalMatrixTest.h | 1679 +++++++++-------- 12 files changed, 3169 insertions(+), 805 deletions(-) create mode 100644 src/TNL/Matrices/Multidiagonal.h create mode 100644 src/TNL/Matrices/Multidiagonal.hpp create mode 100644 src/TNL/Matrices/MultidiagonalMatrixRowView.h create mode 100644 src/TNL/Matrices/MultidiagonalMatrixRowView.hpp create mode 100644 src/TNL/Matrices/MultidiagonalMatrixView.h create mode 100644 src/TNL/Matrices/MultidiagonalMatrixView.hpp create mode 100644 src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h new file mode 100644 index 000000000..5d23cd960 --- /dev/null +++ b/src/TNL/Matrices/Multidiagonal.h @@ -0,0 +1,217 @@ +/*************************************************************************** + Multidiagonal.h - description + ------------------- + begin : Oct 13, 2011 + copyright : (C) 2011 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > +{ + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using IndexAllocatorType = IndexAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesType = typename BaseType::ValuesVector; + using ValuesViewType = typename ValuesType::ViewType; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + + using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; + using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType; + + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Multidiagonal< _Real, _Device, _Index >; + + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + + Multidiagonal(); + + Multidiagonal( const IndexType rows, + const IndexType columns ); + + template< typename Vector > + Multidiagonal( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ); + + ViewType getView() const; // TODO: remove const + + //ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void setDimensions( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ); + + //template< typename Vector > + void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); + + const IndexType& getDiagonalsCount() const; + + const DiagonalsShiftsType& getDiagonalsShifts() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + IndexType getNonemptyRowsCount() const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + + RowView getRow( const IndexType& rowIdx ); + + const RowView getRow( const IndexType& rowIdx ) const; + + void setValue( const RealType& v ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + RealType getElement( const IndexType row, + const IndexType column ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + // copy assignment + Multidiagonal& operator=( const Multidiagonal& matrix ); + + // cross-device copy assignment + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ); + + void save( File& file ) const; + + void load( File& file ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void print( std::ostream& str ) const; + + const IndexerType& getIndexer() const; + + IndexerType& getIndexer(); + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; + + DiagonalsShiftsType diagonalsShifts; + + HostDiagonalsShiftsType hostDiagonalsShifts; + + IndexerType indexer; + + ViewType view; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp new file mode 100644 index 000000000..95f6667c1 --- /dev/null +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -0,0 +1,909 @@ +/*************************************************************************** + Multidiagonal.hpp - description + ------------------- + begin : Oct 13, 2011 + copyright : (C) 2011 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Device > +class MultidiagonalDeviceDependentCode; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ) +{ + this->setDimensions( rows, columns, diagonalsShifts ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getView() const -> ViewType +{ + // TODO: fix when getConstView works + return ViewType( const_cast< Multidiagonal* >( this )->values.getView(), + const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(), + indexer ); +} + +/*template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +}*/ + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +String +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getSerializationType() +{ + return String( "Matrices::Multidiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator], [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +String +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ) +{ + Matrix< Real, Device, Index >::setDimensions( rows, columns ); + this->diagonalsShifts = diagonalsShifts; + this->hostDiagonalsShifts = diagonalsShifts; + const IndexType minShift = min( diagonalsShifts ); + IndexType nonemptyRows = min( rows, columns ); + if( rows > columns && minShift < 0 ) + nonemptyRows = min( rows, nonemptyRows - minShift ); + this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows ); + this->values.setSize( this->indexer.getStorageSize() ); + this->values = 0.0; + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + // template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) +{ + if( max( rowLengths ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( rowLengths.getElement( 0 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); + if( this->getRows() > this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 1 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() == this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() < this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +const Index& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getDiagonalsCount() const +{ + return this->view.getDiagonalsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getDiagonalsShifts() const -> const DiagonalsShiftsType& +{ + return this->diagonalsShifts; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + return this->view.getCompressedRowLengths( rowLengths ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getNonemptyRowsCount() const +{ + return this->indexer.getNonemptyRowsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRowLength( const IndexType row ) const +{ + return this->view.getRowLength( row ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getMaxRowLength() const +{ + return this->view.getMaxRowLength(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +{ + this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getNumberOfNonzeroMatrixElements() const +{ + return this->view.getNumberOfNonzeroMatrixElements(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +reset() +{ + Matrix< Real, Device, Index >::reset(); + this->values.reset(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setValue( const RealType& v ) +{ + this->view.setValue( v ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + return this->view.setElement( row, column, value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + return this->view.addElement( row, column, value, thisElementMultiplicator ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Real +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getElement( const IndexType row, const IndexType column ) const +{ + return this->view.getElement( row, column ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) const +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ + return this->view.rowVectorProduct(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename InVector, + typename OutVector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + this->view.vectorProduct( inVector, outVector ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator ); +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Multidiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +} +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real2, typename Index2 > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + Multidiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Multidiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE; +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +// copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix ) +{ + this->setLike( matrix ); + this->values = matrix.values; + return *this; +} + +// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) +{ + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, + "unknown device" ); + static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, + "unknown device" ); + + this->setLike( matrix ); + if( RowMajorOrder == RowMajorOrder_ ) + this->values = matrix.getValues(); + else + { + if( std::is_same< Device, Device_ >::value ) + { + const auto matrix_view = matrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + else + { + Multidiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; + auxMatrix = matrix; + const auto matrix_view = auxMatrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( File& file ) const +{ + Matrix< Real, Device, Index >::save( file ); + file << diagonalsShifts; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( File& file ) +{ + Matrix< Real, Device, Index >::load( file ); + file >> this->diagonalsShifts; + this->hostDiagonalsShifts = this->diagonalsShifts; + const IndexType minShift = min( diagonalsShifts ); + IndexType nonemptyRows = min( this->getRows(), this->getColumns() ); + if( this->getRows() > this->getColumns() && minShift < 0 ) + nonemptyRows = min( this->getRows(), nonemptyRows - minShift ); + this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows ); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = row - 1; column < row + 2; column++ ) + if( column >= 0 && column < this->columns ) + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + + return this->indexer.getGlobalIndex( row, localIdx ); +} + +/* +template<> +class MultidiagonalDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return 2*row + column; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ 1 ]; + Index i = 3 * row; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ]; + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ] + + vector[ row + 1 ] * values[ i + 1 ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class MultidiagonalDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return ( column - row + 1 )*rows + row - 1; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ rows - 1 ]; + Index i = row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ]; + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ] + + vector[ row + 1 ] * values[ i + 2*rows ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + */ + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h new file mode 100644 index 000000000..68b5be55c --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h @@ -0,0 +1,59 @@ +/*************************************************************************** + MultidiagonalMatrixRowView.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, + typename Indexer > +class MultidiagonalMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using IndexType = typename ValuesView::IndexType; + using ValuesViewType = ValuesView; + using IndexerType = Indexer; + + __cuda_callable__ + MultidiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const RealType& value ); + protected: + + IndexType rowIdx; + + ValuesViewType values; + + Indexer indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp new file mode 100644 index 000000000..349fbe8ea --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp @@ -0,0 +1,75 @@ +/*************************************************************************** + MultidiagonalMatrixRowView.hpp - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ) +: rowIdx( rowIdx ), values( values ), indexer( indexer ) +{ +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +getSize() const -> IndexType +{ + return indexer.getRowSize(); +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType +{ + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + return rowIdx + localIdx - 1; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) -> RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +void +MultidiagonalMatrixRowView< ValuesView, Indexer >:: +setElement( const IndexType localIdx, + const RealType& value ) +{ + this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value; +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h new file mode 100644 index 000000000..addeb18b3 --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -0,0 +1,181 @@ +/*************************************************************************** + MultidiagonalMatrixView.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > +{ + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; + using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >; + using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; + using HostDiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using ValuesViewType = typename BaseType::ValuesView; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > + using Self = MultidiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; + + MultidiagonalMatrixView(); + + MultidiagonalMatrixView( const ValuesViewType& values, + const DiagonalsShiftsView& diagonalsShifts, + const IndexerType& indexer ); + + ViewType getView(); + + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + __cuda_callable__ + const IndexType& getDiagonalsCount() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + IndexType getNonemptyRowsCount() const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + RowView getRow( const IndexType& rowIdx ); + + const RowView getRow( const IndexType& rowIdx ) const; + + void setValue( const RealType& v ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + RealType getElement( const IndexType row, + const IndexType column ) const; + + MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view ); + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + void save( File& file ) const; + + void save( const String& fileName ) const; + + void print( std::ostream& str ) const; + + __cuda_callable__ + const IndexerType& getIndexer() const; + + __cuda_callable__ + IndexerType& getIndexer(); + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; + + DiagonalsShiftsView diagonalsShifts; + + HostDiagonalsShiftsView hostDiagonalsShifts; + + IndexerType indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp new file mode 100644 index 000000000..3d9b0237f --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -0,0 +1,729 @@ +/*************************************************************************** + MultidiagonalMatrixView.hpp - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView( const ValuesViewType& values, + const DiagonalsShiftsView& diagonalsShifts, + const IndexerType& indexer ) +: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), + diagonalsShifts( diagonalsShifts ), + indexer( indexer ) +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getView() -> ViewType +{ + return ViewType( this->values.getView(), indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return String( "Matrices::Multidiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +const Index& +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getDiagonalsCount() const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNonemptyRowsCount() const +{ + return this->indexer.getNonemptyRowsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRowLength( const IndexType row ) const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getMaxRowLength() const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setValue( const RealType& v ) +{ + this->values = v; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return RowView( rowIdx, this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return RowView( rowIdx, this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } + this->values.setElement( this->getElementIndex( row, column ), value ); + return true; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } + const Index i = this->getElementIndex( row, column ); + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); + return true; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Real +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, const IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >& +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const MultidiagonalMatrixView& view ) +{ + MatrixView< Real, Device, Index >::operator=( view ); + this->diagonalsShifts.copy( view.diagonalsShifts ); + this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts ); + this->indexer = view.indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const +{ + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); + const auto values_view = this->values.getConstView(); + const auto indexer = this->indexer; + const auto zero = zero_; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + Real_ sum( zero ); + if( rowIdx == 0 ) + { + reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); + reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + keep( 0, sum ); + return; + } + if( rowIdx + 1 < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); + keep( rowIdx, sum ); + return; + } + if( rowIdx < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + keep( rowIdx, sum ); + } + else + { + keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + const auto indexer = this->indexer; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + if( rowIdx == 0 ) + { + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + } + else if( rowIdx < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + } + else + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + const auto indexer = this->indexer; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + if( rowIdx == 0 ) + { + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + } + else if( rowIdx < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + } + else + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename InVector, + typename OutVector > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { + return value * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); + TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); + + if( RowMajorOrder == RowMajorOrder_ ) + { + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.getValues(); + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues(); + } + else + { + const auto matrix_view = matrix; + const auto matrixMult = matrixMultiplicator; + const auto thisMult = thisMatrixMultiplicator; + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + if( thisMult == 0.0 ) + this->forAllRows( add0 ); + else if( thisMult == 1.0 ) + this->forAllRows( add1 ); + else + this->forAllRows( addGen ); + } +} + +#ifdef HAVE_CUDA +/*template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Multidiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +}*/ +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real2, typename Index2 > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + /*Multidiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Multidiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE;*/ +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const +{ + MatrixView< Real, Device, Index >::save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = row - 1; column < row + 2; column++ ) + if( column >= 0 && column < this->columns ) + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + + return this->indexer.getGlobalIndex( row, localIdx ); +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 290062793..128b48494 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -59,9 +59,6 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > virtual String getSerializationTypeVirtual() const; - void setDimensions( const IndexType rows, - const IndexType columns ); - template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h new file mode 100644 index 000000000..0f0436d74 --- /dev/null +++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h @@ -0,0 +1,106 @@ +/*************************************************************************** + MultidiagonalMatrixIndexer.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Index, + bool RowMajorOrder > +class MultidiagonalMatrixIndexer +{ + public: + + using IndexType = Index; + + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + + __cuda_callable__ + MultidiagonalMatrixIndexer() + : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){}; + + __cuda_callable__ + MultidiagonalMatrixIndexer( const IndexType& rows, + const IndexType& columns, + const IndexType& diagonals, + const IndexType& nonemptyRows ) + : rows( rows ), + columns( columns ), + diagonals( diagonals ), + nonemptyRows( nonemptyRows ) {}; + + __cuda_callable__ + MultidiagonalMatrixIndexer( const MultidiagonalMatrixIndexer& indexer ) + : rows( indexer.rows ), + columns( indexer.columns ), + diagonals( indexer.diagonals ), + nonemptyRows( indexer.nonemptyRows ) {}; + + void set( const IndexType& rows, + const IndexType& columns, + const IndexType& diagonals, + const IndexType& nonemptyRows ) + { + this->rows = rows; + this->columns = columns; + this->diagonals = diagonals; + this->nonemptyRows = nonemptyRows; + }; + + /*__cuda_callable__ + IndexType getRowSize( const IndexType rowIdx ) const + { + if( rowIdx == 0 ) + return 2; + if( columns <= rows ) + { + if( rowIdx == columns - 1 ) + return 2; + if( rowIdx == columns ) + return 1; + } + return 3; + };*/ + + __cuda_callable__ + const IndexType& getRows() const { return this->rows; }; + + __cuda_callable__ + const IndexType& getColumns() const { return this->columns; }; + + __cuda_callable__ + const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; }; + + __cuda_callable__ + IndexType getStorageSize() const { return diagonals * this->nonemptyRows; }; + + __cuda_callable__ + IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const + { + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, diagonals, "" ); + TNL_ASSERT_GE( rowIdx, 0, "" ); + TNL_ASSERT_LT( rowIdx, this->rows, "" ); + + if( RowMajorOrder ) + return diagonals * rowIdx + localIdx; + else + return localIdx * nonemptyRows + rowIdx; + }; + + protected: + + IndexType rows, columns, diagonals, nonemptyRows; +}; + } //namespace details + } // namespace Materices +} // namespace TNL diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 4b95380c4..287495405 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -13,8 +13,8 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) -# CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) -# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) @@ -42,9 +42,9 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) -# ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) -# TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) -# TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) @@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp index 73406d0df..639f19640 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp @@ -1,7 +1,7 @@ /*************************************************************************** MultidiagonalMatrixTest.cpp - description ------------------- - begin : Jan 9, 2020 + begin : Jan 8, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu index e3dab545c..53541edbd 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu @@ -1,7 +1,7 @@ /*************************************************************************** MultidiagonalMatrixTest.cu - description ------------------- - begin : Jan 9, 2020 + begin : Jan 8, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index abe6b64c5..cb9916e4c 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -8,6 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ +#include #include #include #include @@ -33,55 +34,110 @@ static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; void test_GetSerializationType() { - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); } template< typename Matrix > void test_SetDimensions() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - const IndexType rows = 9; - const IndexType cols = 8; + const IndexType rows = 9; + const IndexType cols = 8; + const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m; + m.setDimensions( rows, cols, diagonalsShifts ); - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); } + template< typename Matrix1, typename Matrix2 > void test_SetLike() { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType; - const IndexType rows = 8; - const IndexType cols = 7; + const IndexType rows = 8; + const IndexType cols = 7; + const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; - Matrix1 m1; - m1.reset(); - m1.setDimensions( rows + 1, cols + 2 ); + Matrix1 m1; + m1.setDimensions( rows + 1, cols + 2, diagonalsShifts ); - Matrix2 m2; - m2.reset(); - m2.setDimensions( rows, cols ); + Matrix2 m2; + m2.setDimensions( rows, cols, diagonalsShifts ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNonemptyRowsCount() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x8 matrix: + * + * / 1 0 0 1 0 1 0 0 \ + * | 0 1 0 0 1 0 1 0 | + * | 1 0 1 0 0 1 0 1 | + * | 0 1 0 1 0 0 1 0 | + * \ 0 0 1 0 1 0 0 1 / + */ + Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m1.setValue( 1.0 ); + EXPECT_EQ( m1.getNonemptyRowsCount(), 5 ); + + /* + * Sets up the following 5x5 matrix: + * + * / 1 0 0 1 0 \ + * | 0 1 0 0 1 | + * | 1 0 1 0 0 | + * | 0 1 0 1 0 | + * \ 0 0 1 0 1 / + */ + Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m2.setValue( 1.0 ); + EXPECT_EQ( m2.getNonemptyRowsCount(), 5 ); - m1.setLike( m2 ); + /* + * Sets up the following 8x5 matrix: + * + * / 1 0 0 1 0 \ + * | 0 1 0 0 1 | + * | 1 0 1 0 0 | + * | 0 1 0 1 0 | + * | 0 0 1 0 1 | + * | 0 0 0 1 0 | + * | 0 0 0 0 1 | + * \ 0 0 0 0 0 / + */ + Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m3.setValue( 1.0 ); + EXPECT_EQ( m3.getNonemptyRowsCount(), 7 ); - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } template< typename Matrix > @@ -90,463 +146,470 @@ void test_GetCompressedRowLengths() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; const IndexType rows = 10; const IndexType cols = 11; - Matrix m( rows, cols ); + Matrix m( rows, cols ); - // Insert values into the rows. - RealType value = 1; + // Insert values into the rows. + RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); + for( IndexType i = 0; i < 2; i++ ) // 0th row -> 2 elements + m.setElement( 0, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 1st row - m.setElement( 1, i, value++ ); + for( IndexType i = 0; i < 3; i++ ) // 1st row -> 3 elements + m.setElement( 1, i, value++ ); - for( IndexType i = 0; i < 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); + for( IndexType i = 1; i < 3; i++ ) // 2nd row -> 2 elements + m.setElement( 2, i, value++ ); - for( IndexType i = 0; i < 2; i++ ) // 3rd row - m.setElement( 3, i, value++ ); + for( IndexType i = 2; i < 5; i++ ) // 3rd row -> 3 elements + m.setElement( 3, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 4th row - m.setElement( 4, i, value++ ); + for( IndexType i = 3; i < 6; i++ ) // 4th row -> 3 elements + m.setElement( 4, i, value++ ); - for( IndexType i = 0; i < 4; i++ ) // 5th row - m.setElement( 5, i, value++ ); + for( IndexType i = 4; i < 6; i++ ) // 5th row -> 2 elements + m.setElement( 5, i, value++ ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m.setElement( 6, i, value++ ); + for( IndexType i = 5; i < 8; i++ ) // 6th row -> 3 elements + m.setElement( 6, i, value++ ); - for( IndexType i = 0; i < 6; i++ ) // 7th row - m.setElement( 7, i, value++ ); + for( IndexType i = 6; i < 8; i++ ) // 7th row -> 2 elements + m.setElement( 7, i, value++ ); - for( IndexType i = 0; i < 7; i++ ) // 8th row - m.setElement( 8, i, value++ ); + for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements + m.setElement( 8, i, value++ ); - for( IndexType i = 0; i < 8; i++ ) // 9th row - m.setElement( 9, i, value++ ); + for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements + m.setElement( 9, i, value++ ); - typename Matrix::CompressedRowLengthsVector rowLengths; + typename Matrix::CompressedRowLengthsVector rowLengths( rows ); rowLengths = 0; m.getCompressedRowLengths( rowLengths ); - typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 }; EXPECT_EQ( rowLengths, correctRowLengths ); } template< typename Matrix > void test_GetRowLength() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - const IndexType rows = 8; - const IndexType cols = 7; + const IndexType rows = 8; + const IndexType cols = 7; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getRowLength( 0 ), 7 ); - EXPECT_EQ( m.getRowLength( 1 ), 7 ); - EXPECT_EQ( m.getRowLength( 2 ), 7 ); - EXPECT_EQ( m.getRowLength( 3 ), 7 ); - EXPECT_EQ( m.getRowLength( 4 ), 7 ); - EXPECT_EQ( m.getRowLength( 5 ), 7 ); - EXPECT_EQ( m.getRowLength( 6 ), 7 ); - EXPECT_EQ( m.getRowLength( 7 ), 7 ); + EXPECT_EQ( m.getRowLength( 0 ), 2 ); + EXPECT_EQ( m.getRowLength( 1 ), 3 ); + EXPECT_EQ( m.getRowLength( 2 ), 3 ); + EXPECT_EQ( m.getRowLength( 3 ), 3 ); + EXPECT_EQ( m.getRowLength( 4 ), 3 ); + EXPECT_EQ( m.getRowLength( 5 ), 3 ); + EXPECT_EQ( m.getRowLength( 6 ), 2 ); + EXPECT_EQ( m.getRowLength( 7 ), 1 ); } template< typename Matrix > -void test_GetNumberOfMatrixElements() +void test_GetAllocatedElementsCount() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 7; - const IndexType cols = 6; + const IndexType rows = 7; + const IndexType cols = 6; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); + EXPECT_EQ( m.getAllocatedElementsCount(), 21 ); } template< typename Matrix > void test_GetNumberOfNonzeroMatrixElements() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 7x6 dense matrix: - * - * / 0 2 3 4 5 6 \ - * | 7 8 9 10 11 12 | - * | 13 14 15 16 17 18 | - * | 19 20 21 22 23 24 | - * | 25 26 27 28 29 30 | - * | 31 32 33 34 35 36 | - * \ 37 38 39 40 41 0 / - */ - const IndexType rows = 7; - const IndexType cols = 6; + /* + * Sets up the following 7x6 matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); - m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. - m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. + m.setElement( 5, 5, 0); - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); } template< typename Matrix > void test_Reset() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 0 0 0 0 \ - * | 0 0 0 0 | - * | 0 0 0 0 | - * | 0 0 0 0 | - * \ 0 0 0 0 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + /* + * Sets up the following 5x4 matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); - m.reset(); + m.reset(); - EXPECT_EQ( m.getRows(), 0 ); - EXPECT_EQ( m.getColumns(), 0 ); + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); } template< typename Matrix > void test_SetValue() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 7x6 dense matrix: - * - * / 1 2 3 4 5 6 \ - * | 7 8 9 10 11 12 | - * | 13 14 15 16 17 18 | - * | 19 20 21 22 23 24 | - * | 25 26 27 28 29 30 | - * | 31 32 33 34 35 36 | - * \ 37 38 39 40 41 42 / - */ - const IndexType rows = 7; - const IndexType cols = 6; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 7x6 matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 7 ); - EXPECT_EQ( m.getElement( 1, 1 ), 8 ); - EXPECT_EQ( m.getElement( 1, 2 ), 9 ); - EXPECT_EQ( m.getElement( 1, 3 ), 10 ); - EXPECT_EQ( m.getElement( 1, 4 ), 11 ); - EXPECT_EQ( m.getElement( 1, 5 ), 12 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 13 ); - EXPECT_EQ( m.getElement( 2, 1 ), 14 ); - EXPECT_EQ( m.getElement( 2, 2 ), 15 ); - EXPECT_EQ( m.getElement( 2, 3 ), 16 ); - EXPECT_EQ( m.getElement( 2, 4 ), 17 ); - EXPECT_EQ( m.getElement( 2, 5 ), 18 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 19 ); - EXPECT_EQ( m.getElement( 3, 1 ), 20 ); - EXPECT_EQ( m.getElement( 3, 2 ), 21 ); - EXPECT_EQ( m.getElement( 3, 3 ), 22 ); - EXPECT_EQ( m.getElement( 3, 4 ), 23 ); - EXPECT_EQ( m.getElement( 3, 5 ), 24 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 25 ); - EXPECT_EQ( m.getElement( 4, 1 ), 26 ); - EXPECT_EQ( m.getElement( 4, 2 ), 27 ); - EXPECT_EQ( m.getElement( 4, 3 ), 28 ); - EXPECT_EQ( m.getElement( 4, 4 ), 29 ); - EXPECT_EQ( m.getElement( 4, 5 ), 30 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 31 ); - EXPECT_EQ( m.getElement( 5, 1 ), 32 ); - EXPECT_EQ( m.getElement( 5, 2 ), 33 ); - EXPECT_EQ( m.getElement( 5, 3 ), 34 ); - EXPECT_EQ( m.getElement( 5, 4 ), 35 ); - EXPECT_EQ( m.getElement( 5, 5 ), 36 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 37 ); - EXPECT_EQ( m.getElement( 6, 1 ), 38 ); - EXPECT_EQ( m.getElement( 6, 2 ), 39 ); - EXPECT_EQ( m.getElement( 6, 3 ), 40 ); - EXPECT_EQ( m.getElement( 6, 4 ), 41 ); - EXPECT_EQ( m.getElement( 6, 5 ), 42 ); - - // Set the values of all elements to a certain number - m.setValue( 42 ); - - EXPECT_EQ( m.getElement( 0, 0 ), 42 ); - EXPECT_EQ( m.getElement( 0, 1 ), 42 ); - EXPECT_EQ( m.getElement( 0, 2 ), 42 ); - EXPECT_EQ( m.getElement( 0, 3 ), 42 ); - EXPECT_EQ( m.getElement( 0, 4 ), 42 ); - EXPECT_EQ( m.getElement( 0, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 42 ); - EXPECT_EQ( m.getElement( 1, 1 ), 42 ); - EXPECT_EQ( m.getElement( 1, 2 ), 42 ); - EXPECT_EQ( m.getElement( 1, 3 ), 42 ); - EXPECT_EQ( m.getElement( 1, 4 ), 42 ); - EXPECT_EQ( m.getElement( 1, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 42 ); - EXPECT_EQ( m.getElement( 2, 1 ), 42 ); - EXPECT_EQ( m.getElement( 2, 2 ), 42 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 42 ); - EXPECT_EQ( m.getElement( 2, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 42 ); - EXPECT_EQ( m.getElement( 3, 1 ), 42 ); - EXPECT_EQ( m.getElement( 3, 2 ), 42 ); - EXPECT_EQ( m.getElement( 3, 3 ), 42 ); - EXPECT_EQ( m.getElement( 3, 4 ), 42 ); - EXPECT_EQ( m.getElement( 3, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 42 ); - EXPECT_EQ( m.getElement( 4, 1 ), 42 ); - EXPECT_EQ( m.getElement( 4, 2 ), 42 ); - EXPECT_EQ( m.getElement( 4, 3 ), 42 ); - EXPECT_EQ( m.getElement( 4, 4 ), 42 ); - EXPECT_EQ( m.getElement( 4, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 42 ); - EXPECT_EQ( m.getElement( 5, 1 ), 42 ); - EXPECT_EQ( m.getElement( 5, 2 ), 42 ); - EXPECT_EQ( m.getElement( 5, 3 ), 42 ); - EXPECT_EQ( m.getElement( 5, 4 ), 42 ); - EXPECT_EQ( m.getElement( 5, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 42 ); - EXPECT_EQ( m.getElement( 6, 1 ), 42 ); - EXPECT_EQ( m.getElement( 6, 2 ), 42 ); - EXPECT_EQ( m.getElement( 6, 3 ), 42 ); - EXPECT_EQ( m.getElement( 6, 4 ), 42 ); - EXPECT_EQ( m.getElement( 6, 5 ), 42 ); + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); + + m.setElement( 5, 5, 0); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 5 ); + EXPECT_EQ( m.getElement( 2, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 8 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 10 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 11 ); + EXPECT_EQ( m.getElement( 4, 4 ), 12 ); + EXPECT_EQ( m.getElement( 4, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 14 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 16 ); + + // Set the values of all elements to a certain number + m.setValue( 42 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); + EXPECT_EQ( m.getElement( 0, 1 ), 42 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); + EXPECT_EQ( m.getElement( 1, 1 ), 42 ); + EXPECT_EQ( m.getElement( 1, 2 ), 42 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 42 ); + EXPECT_EQ( m.getElement( 2, 2 ), 42 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 42 ); + EXPECT_EQ( m.getElement( 3, 3 ), 42 ); + EXPECT_EQ( m.getElement( 3, 4 ), 42 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 42 ); + EXPECT_EQ( m.getElement( 4, 4 ), 42 ); + EXPECT_EQ( m.getElement( 4, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 42 ); + EXPECT_EQ( m.getElement( 5, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); } template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * \ 21 22 23 24 25 / - */ - const IndexType rows = 5; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x5 matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * \ 0 0 0 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) > 1 ) + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + else m.setElement( i, j, value++ ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); } template< typename Matrix > void test_AddElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 6x5 matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 dense matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - RealType newValue = 1; - RealType multiplicator = 2; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 matrix: + * + * / 1 2 0 0 0 \ / 1 2 0 0 0 \ / 3 6 0 0 0 \ + * | 6 7 8 0 0 | | 3 4 5 0 0 | | 15 18 21 0 0 | + * 2 * | 0 12 13 14 0 | + | 0 6 7 8 0 | = | 0 30 33 36 0 | + * | 0 0 18 19 20 | | 0 0 9 10 11 | | 0 0 45 48 51 | + * | 0 0 0 24 25 | | 0 0 0 12 13 | | 0 0 0 60 63 | + * \ 0 0 0 0 30 / \ 0 0 0 0 14 / \ 0 0 0 0 74 / + */ + + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( abs( i - j ) <= 1 ) m.addElement( i, j, newValue++, multiplicator ); - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 12 ); - EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 18 ); - EXPECT_EQ( m.getElement( 1, 1 ), 21 ); - EXPECT_EQ( m.getElement( 1, 2 ), 24 ); - EXPECT_EQ( m.getElement( 1, 3 ), 27 ); - EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 33 ); - EXPECT_EQ( m.getElement( 2, 1 ), 36 ); - EXPECT_EQ( m.getElement( 2, 2 ), 39 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 48 ); - EXPECT_EQ( m.getElement( 3, 1 ), 51 ); - EXPECT_EQ( m.getElement( 3, 2 ), 54 ); - EXPECT_EQ( m.getElement( 3, 3 ), 57 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 63 ); - EXPECT_EQ( m.getElement( 4, 1 ), 66 ); - EXPECT_EQ( m.getElement( 4, 2 ), 69 ); - EXPECT_EQ( m.getElement( 4, 3 ), 72 ); - EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 78 ); - EXPECT_EQ( m.getElement( 5, 1 ), 81 ); - EXPECT_EQ( m.getElement( 5, 2 ), 84 ); - EXPECT_EQ( m.getElement( 5, 3 ), 87 ); - EXPECT_EQ( m.getElement( 5, 4 ), 90 ); + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 15 ); + EXPECT_EQ( m.getElement( 1, 1 ), 18 ); + EXPECT_EQ( m.getElement( 1, 2 ), 21 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 30 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 36 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 45 ); + EXPECT_EQ( m.getElement( 3, 3 ), 48 ); + EXPECT_EQ( m.getElement( 3, 4 ), 51 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 60 ); + EXPECT_EQ( m.getElement( 4, 4 ), 63 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 74 ); } template< typename Matrix > @@ -557,63 +620,56 @@ void test_SetRow() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 3x7 dense matrix: + * Sets up the following 3x7 matrix: * - * / 1 2 3 4 5 6 7 \ - * | 8 9 10 11 12 13 14 | - * \ 15 16 17 18 19 20 21 / + * / 1 2 0 0 0 0 0 \ + * | 8 9 10 0 0 0 0 | + * \ 0 16 17 18 0 0 0 / */ const IndexType rows = 3; const IndexType cols = 7; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 3 ][ 5 ] { - { 11, 11, 11, 11, 11 }, - { 22, 22, 22, 22, 22 }, - { 33, 33, 33, 33, 33 } }; - IndexType columnIndexes[ 3 ][ 5 ] { - { 0, 1, 2, 3, 4 }, - { 0, 1, 2, 3, 4 }, - { 2, 3, 4, 5, 6 } }; + RealType values[ 3 ][ 3 ] { + { 1, 2, 0 }, + { 8, 9, 10 }, + { 16, 17, 18 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 5; i++ ) - row.setElement( i, values[ rowIdx ][ i ] ); + for( IndexType i = 0; i < 3; i++ ) + { + if( rowIdx == 0 && i > 1 ) + break; + row.setElement( i, values[ rowIdx ][ i ] ); + } }; TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 11 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - EXPECT_EQ( m.getElement( 0, 6 ), 7 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 22 ); - EXPECT_EQ( m.getElement( 1, 1 ), 22 ); - EXPECT_EQ( m.getElement( 1, 2 ), 22 ); - EXPECT_EQ( m.getElement( 1, 3 ), 22 ); - EXPECT_EQ( m.getElement( 1, 4 ), 22 ); - EXPECT_EQ( m.getElement( 1, 5 ), 13 ); - EXPECT_EQ( m.getElement( 1, 6 ), 14 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 8 ); + EXPECT_EQ( m.getElement( 1, 1 ), 9 ); + EXPECT_EQ( m.getElement( 1, 2 ), 10 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 16 ); - EXPECT_EQ( m.getElement( 2, 2 ), 33 ); - EXPECT_EQ( m.getElement( 2, 3 ), 33 ); - EXPECT_EQ( m.getElement( 2, 4 ), 33 ); - EXPECT_EQ( m.getElement( 2, 5 ), 33 ); - EXPECT_EQ( m.getElement( 2, 6 ), 33 ); + EXPECT_EQ( m.getElement( 2, 2 ), 17 ); + EXPECT_EQ( m.getElement( 2, 3 ), 18 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); } template< typename Matrix > @@ -623,14 +679,14 @@ void test_AddRow() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 6x5 dense matrix: + * Sets up the following 6x5 matrix: * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / */ const IndexType rows = 6; @@ -641,68 +697,72 @@ void test_AddRow() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); EXPECT_EQ( m.getElement( 4, 3 ), 24 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); EXPECT_EQ( m.getElement( 5, 4 ), 30 ); // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 sparse matrix: * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / + * / 0 0 0 0 0 0 \ / 1 2 0 0 0 \ / 11 11 0 0 0 \ / 11 11 0 0 0 \ + * | 0 1 0 0 0 0 | | 6 7 8 0 0 | | 22 22 22 0 0 | | 28 29 30 0 0 | + * | 0 0 2 0 0 0 | * | 0 12 13 14 0 | + | 0 33 33 33 0 | = | 0 57 59 61 0 | + * | 0 0 0 3 0 0 | | 0 0 18 19 20 | | 0 0 44 44 44 | | 0 0 98 101 104 | + * | 0 0 0 0 4 0 | | 0 0 0 24 25 | | 0 0 0 55 55 | | 0 0 0 151 155 | + * \ 0 0 0 0 0 5 / \ 0 0 0 0 30 / \ 0 0 0 0 66 / \ 0 0 0 0 216 / */ auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 6 ][ 5 ] { - { 11, 11, 11, 11, 0 }, - { 22, 22, 22, 22, 0 }, - { 33, 33, 33, 33, 0 }, - { 44, 44, 44, 44, 0 }, - { 55, 55, 55, 55, 0 }, - { 66, 66, 66, 66, 0 } }; + RealType values[ 6 ][ 3 ] { + { 11, 11, 0 }, + { 22, 22, 22 }, + { 33, 33, 33 }, + { 44, 44, 44 }, + { 55, 55, 55 }, + { 66, 66, 66 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 5; i++ ) + for( IndexType i = 0; i < 3; i++ ) { RealType& val = row.getValue( i ); val = rowIdx * val + values[ rowIdx ][ i ]; @@ -711,208 +771,207 @@ void test_AddRow() TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 28 ); - EXPECT_EQ( m.getElement( 1, 1 ), 29 ); - EXPECT_EQ( m.getElement( 1, 2 ), 30 ); - EXPECT_EQ( m.getElement( 1, 3 ), 31 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 55 ); - EXPECT_EQ( m.getElement( 2, 1 ), 57 ); - EXPECT_EQ( m.getElement( 2, 2 ), 59 ); - EXPECT_EQ( m.getElement( 2, 3 ), 61 ); - EXPECT_EQ( m.getElement( 2, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 92 ); - EXPECT_EQ( m.getElement( 3, 1 ), 95 ); - EXPECT_EQ( m.getElement( 3, 2 ), 98 ); - EXPECT_EQ( m.getElement( 3, 3 ), 101 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 139 ); - EXPECT_EQ( m.getElement( 4, 1 ), 143 ); - EXPECT_EQ( m.getElement( 4, 2 ), 147 ); - EXPECT_EQ( m.getElement( 4, 3 ), 151 ); - EXPECT_EQ( m.getElement( 4, 4 ), 100 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 196 ); - EXPECT_EQ( m.getElement( 5, 1 ), 201 ); - EXPECT_EQ( m.getElement( 5, 2 ), 206 ); - EXPECT_EQ( m.getElement( 5, 3 ), 211 ); - EXPECT_EQ( m.getElement( 5, 4 ), 150 ); + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 104 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 155 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 216 ); } template< typename Matrix > void test_VectorProduct() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - VectorType inVector; - inVector.setSize( 4 ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) - inVector.setElement( i, 2 ); + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - VectorType outVector; - outVector.setSize( 5 ); - for( IndexType j = 0; j < outVector.getSize(); j++ ) - outVector.setElement( j, 0 ); + VectorType inVector( 4 ); + inVector = 2; + VectorType outVector( 5 ); + outVector = 0; - m.vectorProduct( inVector, outVector); + m.vectorProduct( inVector, outVector); - EXPECT_EQ( outVector.getElement( 0 ), 20 ); - EXPECT_EQ( outVector.getElement( 1 ), 52 ); - EXPECT_EQ( outVector.getElement( 2 ), 84 ); - EXPECT_EQ( outVector.getElement( 3 ), 116 ); - EXPECT_EQ( outVector.getElement( 4 ), 148 ); + EXPECT_EQ( outVector.getElement( 0 ), 6 ); + EXPECT_EQ( outVector.getElement( 1 ), 36 ); + EXPECT_EQ( outVector.getElement( 2 ), 66 ); + EXPECT_EQ( outVector.getElement( 3 ), 62 ); + EXPECT_EQ( outVector.getElement( 4 ), 40 ); } -template< typename Matrix > +template< typename Matrix1, typename Matrix2 = Matrix1 > void test_AddMatrix() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix1 m( rows, cols ); -/* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - Matrix m2; - m2.reset(); - m2.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 3 4 5 0 | + * | 0 6 7 8 | + * | 0 0 9 10 | + * \ 0 0 0 11 / + */ + Matrix2 m2( rows, cols ); - RealType newValue = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++) + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) m2.setElement( i, j, newValue++ ); - /* - * Sets up the following 5x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ + /* + * Compute the following 5x4 matrix: + * + * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ + * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | + * | 0 10 11 12 | + 2 * | 0 6 7 8 | = | 0 22 25 28 | + * | 0 0 15 16 | | 0 0 9 10 | | 0 0 33 36 | + * \ 0 0 0 20 / \ 0 0 0 11 / \ 0 0 0 42 / + */ - Matrix mResult; - mResult.reset(); - mResult.setDimensions( rows, cols ); - - mResult = m; - - RealType matrixMultiplicator = 2; - RealType thisMatrixMultiplicator = 1; - - mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); - EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); - EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); - EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); - EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); - EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); - EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); - EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); - EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); - EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); - EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); - EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); - EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); - EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); - EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); - EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); - - EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); - EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); - EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); - EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); - - EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); - EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); - EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); - EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); - - EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); - EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); - EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); - EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); - - EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); - EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); - EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); - EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); - - EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); - EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); - EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); - EXPECT_EQ( mResult.getElement( 4, 3 ), 60 ); + Matrix1 mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 11 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 14 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 17 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 22 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 25 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 28 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 42 ); } template< typename Matrix > @@ -922,7 +981,7 @@ void test_GetMatrixProduct() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x4 dense matrix: + * Sets up the following 5x4 matrix: * * / 1 2 3 4 \ * | 5 6 7 8 | @@ -943,7 +1002,7 @@ void test_GetMatrixProduct() leftMatrix.setElement( i, j, value++ ); /* - * Sets up the following 4x5 dense matrix: + * Sets up the following 4x5 matrix: * * / 1 2 3 4 5 \ * | 6 7 8 9 10 | @@ -963,7 +1022,7 @@ void test_GetMatrixProduct() rightMatrix.setElement( i, j, newValue++ ); /* - * Sets up the following 5x5 resulting dense matrix: + * Sets up the following 5x5 resulting matrix: * * / 0 0 0 0 \ * | 0 0 0 0 | @@ -1027,7 +1086,7 @@ void test_GetTransposition() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 3x2 dense matrix: + * Sets up the following 3x2 matrix: * * / 1 2 \ * | 3 4 | @@ -1048,7 +1107,7 @@ void test_GetTransposition() m.print( std::cout ); /* - * Sets up the following 2x3 dense matrix: + * Sets up the following 2x3 matrix: * * / 0 0 0 \ * \ 0 0 0 / @@ -1066,7 +1125,7 @@ void test_GetTransposition() mTransposed.print( std::cout ); /* - * Should result in the following 2x3 dense matrix: + * Should result in the following 2x3 matrix: * * / 1 3 5 \ * \ 2 4 6 / @@ -1089,7 +1148,7 @@ void test_PerformSORIteration() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; /* - * Sets up the following 4x4 dense matrix: + * Sets up the following 4x4 matrix: * * / 4 1 1 1 \ * | 1 4 1 1 | @@ -1164,43 +1223,44 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); MultidiagonalHost hostMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - hostMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + hostMatrix.setElement( i, j, i + j ); Matrix matrix( rows, columns ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) - { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else - EXPECT_EQ( matrix.getElement( i, j ), i + j ); - } + if( abs( i - j ) <= 1 ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); #ifdef HAVE_CUDA MultidiagonalCuda cudaMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; matrix = cudaMatrix; - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j < rows; j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) { - if( j > i ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else + if( abs( i - j ) <= 1 ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } #endif } @@ -1209,123 +1269,125 @@ void test_AssignmentOperator() template< typename Matrix > void test_SaveAndLoad() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 4x4 dense matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * \ 13 14 15 16 / - */ - const IndexType rows = 4; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * \ 0 0 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; - Matrix savedMatrix; - savedMatrix.reset(); - savedMatrix.setDimensions( rows, cols ); + Matrix savedMatrix( rows, cols ); - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - savedMatrix.setElement( i, j, value++ ); - - ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( rows, cols ); - - ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + savedMatrix.setElement( i, j, value ); + value++; + } + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); } template< typename Matrix > void test_Print() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 4 \ - * | 5 6 7 8 | - * | 9 10 11 12 | - * | 13 14 15 16 | - * \ 17 18 19 20 / - */ - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; - RealType value = 1; - for( IndexType i = 0; i < rows; i++) - for( IndexType j = 0; j < cols; j++) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - #include - std::stringstream printed; - std::stringstream couted; + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + std::stringstream printed; + std::stringstream couted; - m.print( std::cout ); //all the std::cout goes to ss + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); - std::cout.rdbuf(old_buf); //reset + m.print( std::cout ); //all the std::cout goes to ss - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" - "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" - "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" - "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" - "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" + "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" + "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" + "Row: 3 -> Col:2->15\t Col:3->16\t\n" + "Row: 4 -> Col:3->20\t\n"; - EXPECT_EQ( printed.str(), couted.str() ); + EXPECT_EQ( printed.str(), couted.str() ); } // test fixture for typed tests @@ -1388,6 +1450,21 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } +TYPED_TEST( MatrixTest, getNonemptyRowsCountTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNonemptyRowsCount< MatrixType >(); +} + +/* +TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetCompressedRowLengths< MatrixType >(); +} + TYPED_TEST( MatrixTest, getRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1395,11 +1472,11 @@ TYPED_TEST( MatrixTest, getRowLengthTest ) test_GetRowLength< MatrixType >(); } -TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) +TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; - test_GetNumberOfMatrixElements< MatrixType >(); + test_GetAllocatedElementsCount< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) @@ -1465,6 +1542,19 @@ TYPED_TEST( MatrixTest, addMatrixTest ) test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) +{ + using MatrixType = typename TestFixture::MatrixType; + + using RealType = typename MatrixType::RealType; + using DeviceType = typename MatrixType::DeviceType; + using IndexType = typename MatrixType::IndexType; + using RealAllocatorType = typename MatrixType::RealAllocatorType; + using MatrixType2 = TNL::Matrices::Multidiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; + + test_AddMatrix< MatrixType, MatrixType2 >(); +} + TYPED_TEST( MatrixTest, assignmentOperatorTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1485,6 +1575,7 @@ TYPED_TEST( MatrixTest, printTest ) test_Print< MatrixType >(); } +*/ //// test_getType is not general enough yet. DO NOT TEST IT YET. -- GitLab From b20418121b70638c87ce82851777105c29e9e464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 13 Jan 2020 21:39:52 +0100 Subject: [PATCH 091/179] Debugging multidiagonal matrix. --- src/TNL/Matrices/Matrix.h | 4 +- src/TNL/Matrices/MatrixView.h | 4 +- src/TNL/Matrices/Multidiagonal.h | 10 +- src/TNL/Matrices/Multidiagonal.hpp | 19 +- src/TNL/Matrices/MultidiagonalMatrixRowView.h | 11 +- .../Matrices/MultidiagonalMatrixRowView.hpp | 35 +- src/TNL/Matrices/MultidiagonalMatrixView.h | 7 +- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 172 +++-- src/TNL/Matrices/Tridiagonal.h | 4 +- src/TNL/Matrices/Tridiagonal.hpp | 17 +- src/TNL/Matrices/TridiagonalMatrixView.h | 4 +- src/TNL/Matrices/TridiagonalMatrixView.hpp | 10 +- .../details/MultidiagonalMatrixIndexer.h | 3 + .../Matrices/MultidiagonalMatrixTest.h | 666 ++++++++---------- 14 files changed, 453 insertions(+), 513 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 7813fa962..ebe7ccc21 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -76,11 +76,11 @@ public: __cuda_callable__ IndexType getColumns() const; - virtual bool setElement( const IndexType row, + virtual void setElement( const IndexType row, const IndexType column, const RealType& value ) = 0; - virtual bool addElement( const IndexType row, + virtual void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ) = 0; diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 467d02349..2a6429df5 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -73,11 +73,11 @@ public: * in the future and it does not slow down, declare them as virtual here. */ - virtual bool setElement( const IndexType row, + virtual void setElement( const IndexType row, const IndexType column, const RealType& value ) = 0; - virtual bool addElement( const IndexType row, + virtual void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ) = 0; diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 5d23cd960..1741c0c75 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -38,12 +38,12 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > using ValuesType = typename BaseType::ValuesVector; using ValuesViewType = typename ValuesType::ViewType; using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; - using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >; + using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; - using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; - using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType; @@ -119,11 +119,11 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > void setValue( const RealType& v ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 95f6667c1..53e3c7f2f 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -44,6 +44,7 @@ Multidiagonal( const IndexType rows, const IndexType columns, const Vector& diagonalsShifts ) { + TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." ); this->setDimensions( rows, columns, diagonalsShifts ); } @@ -60,6 +61,7 @@ getView() const -> ViewType // TODO: fix when getConstView works return ViewType( const_cast< Multidiagonal* >( this )->values.getView(), const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(), + const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(), indexer ); } @@ -358,11 +360,11 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator, typename IndexAllocator > -bool +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { - return this->view.setElement( row, column, value ); + this->view.setElement( row, column, value ); } template< typename Real, @@ -371,14 +373,14 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator, typename IndexAllocator > -bool +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) { - return this->view.addElement( row, column, value, thisElementMultiplicator ); + this->view.addElement( row, column, value, thisElementMultiplicator ); } template< typename Real, @@ -745,14 +747,7 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - for( IndexType column = row - 1; column < row + 2; column++ ) - if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; - str << std::endl; - } + this->view.print( str ); } template< typename Real, diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h index 68b5be55c..0825d6fb3 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixRowView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h @@ -14,7 +14,8 @@ namespace TNL { namespace Matrices { template< typename ValuesView, - typename Indexer > + typename Indexer, + typename DiagonalsShiftsView_ > class MultidiagonalMatrixRowView { public: @@ -23,11 +24,13 @@ class MultidiagonalMatrixRowView using IndexType = typename ValuesView::IndexType; using ValuesViewType = ValuesView; using IndexerType = Indexer; + using DiagonalsShiftsView = DiagonalsShiftsView_; __cuda_callable__ MultidiagonalMatrixRowView( const IndexType rowIdx, - const ValuesViewType& values, - const IndexerType& indexer ); + const DiagonalsShiftsView& diagonalsShifts, + const ValuesViewType& values, + const IndexerType& indexer); __cuda_callable__ IndexType getSize() const; @@ -48,6 +51,8 @@ class MultidiagonalMatrixRowView IndexType rowIdx; + DiagonalsShiftsView diagonalsShifts; + ValuesViewType values; Indexer indexer; diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp index 349fbe8ea..88aae3f15 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp @@ -13,58 +13,59 @@ namespace TNL { namespace Matrices { -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: MultidiagonalMatrixRowView( const IndexType rowIdx, - const ValuesViewType& values, - const IndexerType& indexer ) -: rowIdx( rowIdx ), values( values ), indexer( indexer ) + const DiagonalsShiftsView& diagonalsShifts, + const ValuesViewType& values, + const IndexerType& indexer ) +: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer ) { } -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: getSize() const -> IndexType { return indexer.getRowSize(); } -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: getColumnIndex( const IndexType localIdx ) const -> const IndexType { TNL_ASSERT_GE( localIdx, 0, "" ); - TNL_ASSERT_LT( localIdx, 3, "" ); - return rowIdx + localIdx - 1; + TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" ); + return rowIdx + diagonalsShifts[ localIdx ]; } -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: getValue( const IndexType localIdx ) const -> const RealType& { return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; } -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ auto -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: getValue( const IndexType localIdx ) -> RealType& { return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; } -template< typename ValuesView, typename Indexer > +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > __cuda_callable__ void -MultidiagonalMatrixRowView< ValuesView, Indexer >:: +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: setElement( const IndexType localIdx, const RealType& value ) { diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index addeb18b3..3d33ac0ae 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -38,7 +38,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > using ValuesViewType = typename BaseType::ValuesView; using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; - using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType >; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; @@ -55,6 +55,7 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > MultidiagonalMatrixView( const ValuesViewType& values, const DiagonalsShiftsView& diagonalsShifts, + const HostDiagonalsShiftsView& hostDiagonalsShifts, const IndexerType& indexer ); ViewType getView(); @@ -92,11 +93,11 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > void setValue( const RealType& v ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 3d9b0237f..1ba8dc34d 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -33,9 +33,11 @@ template< typename Real, MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: MultidiagonalMatrixView( const ValuesViewType& values, const DiagonalsShiftsView& diagonalsShifts, + const HostDiagonalsShiftsView& hostDiagonalsShifts, const IndexerType& indexer ) : MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), diagonalsShifts( diagonalsShifts ), + hostDiagonalsShifts( hostDiagonalsShifts ), indexer( indexer ) { } @@ -48,7 +50,10 @@ auto MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getView() -> ViewType { - return ViewType( this->values.getView(), indexer ); + return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(), + const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(), + const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(), + indexer ); } template< typename Real, @@ -59,7 +64,10 @@ auto MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getConstView() const -> ConstViewType { - return ConstViewType( this->values.getConstView(), indexer ); + return ConstViewType( this->values.getConstView(), + this->diagonalsShifts.getConstView(), + this->hostDiagonalsShifts.getConstView(), + indexer ); } template< typename Real, @@ -208,7 +216,11 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: setValue( const RealType& v ) { - this->values = v; + const RealType newValue = v; + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value ) mutable { + value = newValue; + }; + this->forAllRows( f ); } template< typename Real, @@ -220,7 +232,7 @@ auto MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getRow( const IndexType& rowIdx ) const -> const RowView { - return RowView( rowIdx, this->values.getView(), this->indexer ); + return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); } template< typename Real, @@ -232,14 +244,14 @@ auto MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getRow( const IndexType& rowIdx ) -> RowView { - return RowView( rowIdx, this->values.getView(), this->indexer ); + return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); } template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { @@ -247,21 +259,26 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) TNL_ASSERT_LT( row, this->getRows(), "" ); TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) + + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + { + this->values.setElement( this->getElementIndex( row, i ), value ); + return; + } + if( value != 0.0 ) { std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; throw std::logic_error( msg.str() ); } - this->values.setElement( this->getElementIndex( row, column ), value ); - return true; } template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: addElement( const IndexType row, const IndexType column, @@ -272,15 +289,20 @@ addElement( const IndexType row, TNL_ASSERT_LT( row, this->getRows(), "" ); TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( row - column ) > 1 ) + + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + { + const Index idx = this->getElementIndex( row, i ); + this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value ); + return; + } + if( value != 0.0 ) { std::stringstream msg; - msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; throw std::logic_error( msg.str() ); } - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; } template< typename Real, @@ -296,9 +318,10 @@ getElement( const IndexType row, const IndexType column ) const TNL_ASSERT_GE( column, 0, "" ); TNL_ASSERT_LT( column, this->getColumns(), "" ); - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + return this->values.getElement( this->getElementIndex( row, i ) ); + return 0.0; } template< typename Real, @@ -326,35 +349,20 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke { using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); const auto values_view = this->values.getConstView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); const auto indexer = this->indexer; const auto zero = zero_; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { Real_ sum( zero ); - if( rowIdx == 0 ) - { - reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); - reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); - keep( 0, sum ); - return; - } - if( rowIdx + 1 < indexer.getColumns() ) - { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); - keep( rowIdx, sum ); - return; - } - if( rowIdx < indexer.getColumns() ) + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { - reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); - reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); - keep( rowIdx, sum ); - } - else - { - keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) ); } + keep( rowIdx, sum ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -368,7 +376,7 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero ); + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); } template< typename Real, @@ -381,26 +389,17 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - if( rowIdx == 0 ) + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); - } - else if( rowIdx + 1 < indexer.getColumns() ) - { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx, 0 ) ] ); } - else if( rowIdx < indexer.getColumns() ) - { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - } - else - function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -415,26 +414,17 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); const auto indexer = this->indexer; auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - if( rowIdx == 0 ) - { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); - } - else if( rowIdx + 1 < indexer.getColumns() ) - { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); - } - else if( rowIdx < indexer.getColumns() ) + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ); } - else - function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } @@ -460,7 +450,7 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) { - this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function ); + this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); } template< typename Real, @@ -517,7 +507,7 @@ addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); - if( RowMajorOrder == RowMajorOrder_ ) + /*if( RowMajorOrder == RowMajorOrder_ ) { if( thisMatrixMultiplicator == 1.0 ) this->values += matrixMultiplicator * matrix.getValues(); @@ -544,7 +534,7 @@ addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ this->forAllRows( add1 ); else this->forAllRows( addGen ); - } + }*/ } #ifdef HAVE_CUDA @@ -673,12 +663,19 @@ template< typename Real, bool RowMajorOrder > void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) + for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ ) { - str <<"Row: " << row << " -> "; - for( IndexType column = row - 1; column < row + 2; column++ ) - if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str <<"Row: " << rowIdx << " -> "; + for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ ) + { + const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ]; + if( columnIdx >= 0 && columnIdx < this->columns ) + { + auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) ); + if( v ) + str << " Col:" << columnIdx << "->" << v << "\t"; + } + } str << std::endl; } } @@ -713,15 +710,8 @@ template< typename Real, bool RowMajorOrder > __cuda_callable__ Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -getElementIndex( const IndexType row, const IndexType column ) const +getElementIndex( const IndexType row, const IndexType localIdx ) const { - IndexType localIdx = column - row; - if( row > 0 ) - localIdx++; - - TNL_ASSERT_GE( localIdx, 0, "" ); - TNL_ASSERT_LT( localIdx, 3, "" ); - return this->indexer.getGlobalIndex( row, localIdx ); } diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index e7e3ab6b2..82549e744 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -97,11 +97,11 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void setValue( const RealType& v ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 6c09238ff..41d722c6a 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -285,11 +285,11 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -bool +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { - return this->view.setElement( row, column, value ); + this->view.setElement( row, column, value ); } template< typename Real, @@ -297,14 +297,14 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -bool +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) { - return this->view.addElement( row, column, value, thisElementMultiplicator ); + this->view.addElement( row, column, value, thisElementMultiplicator ); } template< typename Real, @@ -645,14 +645,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - for( IndexType column = row - 1; column < row + 2; column++ ) - if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; - str << std::endl; - } + this->view.print( str ); } template< typename Real, diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 128b48494..7db517cbd 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -81,11 +81,11 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > void setValue( const RealType& v ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 4d4950c4e..e851d2a1f 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -213,7 +213,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { @@ -235,7 +235,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: addElement( const IndexType row, const IndexType column, @@ -638,7 +638,11 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os str <<"Row: " << row << " -> "; for( IndexType column = row - 1; column < row + 2; column++ ) if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + { + auto v = this->getElement( row, column ); + if( v ) + str << " Col:" << column << "->" << v << "\t"; + } str << std::endl; } } diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h index 0f0436d74..3597c30f7 100644 --- a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h @@ -77,6 +77,9 @@ class MultidiagonalMatrixIndexer __cuda_callable__ const IndexType& getColumns() const { return this->columns; }; + __cuda_callable__ + const IndexType& getDiagonals() const { return this->diagonals; }; + __cuda_callable__ const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; }; diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index cb9916e4c..514ea39e0 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -137,7 +137,6 @@ void test_GetNonemptyRowsCount() Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); m3.setValue( 1.0 ); EXPECT_EQ( m3.getNonemptyRowsCount(), 7 ); - } template< typename Matrix > @@ -148,87 +147,53 @@ void test_GetCompressedRowLengths() using IndexType = typename Matrix::IndexType; using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - const IndexType rows = 10; - const IndexType cols = 11; - - Matrix m( rows, cols ); - - // Insert values into the rows. - RealType value = 1; - - for( IndexType i = 0; i < 2; i++ ) // 0th row -> 2 elements - m.setElement( 0, i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) // 1st row -> 3 elements - m.setElement( 1, i, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row -> 2 elements - m.setElement( 2, i, value++ ); - - for( IndexType i = 2; i < 5; i++ ) // 3rd row -> 3 elements - m.setElement( 3, i, value++ ); - - for( IndexType i = 3; i < 6; i++ ) // 4th row -> 3 elements - m.setElement( 4, i, value++ ); - - for( IndexType i = 4; i < 6; i++ ) // 5th row -> 2 elements - m.setElement( 5, i, value++ ); - - for( IndexType i = 5; i < 8; i++ ) // 6th row -> 3 elements - m.setElement( 6, i, value++ ); - - for( IndexType i = 6; i < 8; i++ ) // 7th row -> 2 elements - m.setElement( 7, i, value++ ); - - for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements - m.setElement( 8, i, value++ ); + /* + * Sets up the following 8x8 matrix: + * + * / 0 0 0 1 0 1 0 0 \ -> 2 + * | 0 1 0 0 1 0 1 0 | -> 3 + * | 1 0 1 0 0 1 0 1 | -> 4 + * | 0 1 0 1 0 0 1 0 | -> 3 + * | 0 0 1 0 1 0 0 1 | -> 3 + * | 0 0 0 1 0 1 0 0 | -> 2 + * | 0 0 0 0 1 0 1 0 | -> 2 + * \ 0 0 0 0 0 1 0 0 / -> 1 + */ + + const IndexType rows = 8; + const IndexType cols = 8; - for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements - m.setElement( 9, i, value++ ); + Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m.setValue( 1.0 ); + m.setElement( 0, 0, 0.0 ); + m.setElement( 7, 7, 0.0 ); typename Matrix::CompressedRowLengthsVector rowLengths( rows ); rowLengths = 0; m.getCompressedRowLengths( rowLengths ); - typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 }; + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 4, 3, 3, 2, 2, 1 }; EXPECT_EQ( rowLengths, correctRowLengths ); } -template< typename Matrix > -void test_GetRowLength() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - - const IndexType rows = 8; - const IndexType cols = 7; - - Matrix m( rows, cols ); - - EXPECT_EQ( m.getRowLength( 0 ), 2 ); - EXPECT_EQ( m.getRowLength( 1 ), 3 ); - EXPECT_EQ( m.getRowLength( 2 ), 3 ); - EXPECT_EQ( m.getRowLength( 3 ), 3 ); - EXPECT_EQ( m.getRowLength( 4 ), 3 ); - EXPECT_EQ( m.getRowLength( 5 ), 3 ); - EXPECT_EQ( m.getRowLength( 6 ), 2 ); - EXPECT_EQ( m.getRowLength( 7 ), 1 ); -} - template< typename Matrix > void test_GetAllocatedElementsCount() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; const IndexType rows = 7; const IndexType cols = 6; - Matrix m( rows, cols ); + Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m1.getAllocatedElementsCount(), 28 ); + + Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m2.getAllocatedElementsCount(), 32 ); - EXPECT_EQ( m.getAllocatedElementsCount(), 21 ); + Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m3.getAllocatedElementsCount(), 32 ); } template< typename Matrix > @@ -237,29 +202,27 @@ void test_GetNumberOfNonzeroMatrixElements() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 7x6 matrix: * - * / 0 1 0 0 0 0 \ - * | 2 3 4 0 0 0 | - * | 0 5 6 7 0 0 | - * | 0 0 8 9 10 0 | - * | 0 0 0 11 12 13 | - * | 0 0 0 0 14 0 | - * \ 0 0 0 0 0 16 / + * / 0 0 1 0 1 0 \ -> 2 + * | 0 1 0 1 0 1 | -> 3 + * | 0 0 1 0 1 0 | -> 2 + * | 1 0 0 1 0 1 | -> 3 + * | 0 1 0 0 1 0 | -> 2 + * | 0 0 1 0 0 1 | -> 2 + * \ 0 0 0 1 0 0 / -> 1 + * ---- + * 15 */ const IndexType rows = 7; const IndexType cols = 6; - Matrix m( rows, cols ); - - RealType value = 0; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) - m.setElement( i, j, value++ ); - - m.setElement( 5, 5, 0); + Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + m.setValue( 1.0 ); + m.setElement( 0, 0, 0.0 ); EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); } @@ -270,6 +233,7 @@ void test_Reset() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 5x4 matrix: @@ -283,8 +247,7 @@ void test_Reset() const IndexType rows = 5; const IndexType cols = 4; - Matrix m( rows, cols ); - + Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) ); m.reset(); EXPECT_EQ( m.getRows(), 0 ); @@ -297,130 +260,73 @@ void test_SetValue() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 7x6 matrix: * - * / 0 1 0 0 0 0 \ - * | 2 3 4 0 0 0 | - * | 0 5 6 7 0 0 | - * | 0 0 8 9 10 0 | - * | 0 0 0 11 12 13 | - * | 0 0 0 0 14 0 | - * \ 0 0 0 0 0 16 / + * / 1 0 1 0 1 0 \ + * | 0 1 0 1 0 1 | + * | 0 0 1 0 1 0 | + * | 1 0 0 1 0 1 | + * | 0 1 0 0 1 0 | + * | 0 0 1 0 0 1 | + * \ 0 0 0 1 0 0 / */ const IndexType rows = 7; const IndexType cols = 6; - Matrix m( rows, cols ); - - RealType value = 0; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) - m.setElement( i, j, value++ ); - - m.setElement( 5, 5, 0); - - EXPECT_EQ( m.getElement( 0, 0 ), 0 ); - EXPECT_EQ( m.getElement( 0, 1 ), 1 ); - EXPECT_EQ( m.getElement( 0, 2 ), 0 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 2 ); - EXPECT_EQ( m.getElement( 1, 1 ), 3 ); - EXPECT_EQ( m.getElement( 1, 2 ), 4 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 5 ); - EXPECT_EQ( m.getElement( 2, 2 ), 6 ); - EXPECT_EQ( m.getElement( 2, 3 ), 7 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 2, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 8 ); - EXPECT_EQ( m.getElement( 3, 3 ), 9 ); - EXPECT_EQ( m.getElement( 3, 4 ), 10 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 11 ); - EXPECT_EQ( m.getElement( 4, 4 ), 12 ); - EXPECT_EQ( m.getElement( 4, 5 ), 13 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 14 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 0 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 16 ); - - // Set the values of all elements to a certain number - m.setValue( 42 ); - - EXPECT_EQ( m.getElement( 0, 0 ), 42 ); - EXPECT_EQ( m.getElement( 0, 1 ), 42 ); - EXPECT_EQ( m.getElement( 0, 2 ), 0 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 42 ); - EXPECT_EQ( m.getElement( 1, 1 ), 42 ); - EXPECT_EQ( m.getElement( 1, 2 ), 42 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 42 ); - EXPECT_EQ( m.getElement( 2, 2 ), 42 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 2, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 42 ); - EXPECT_EQ( m.getElement( 3, 3 ), 42 ); - EXPECT_EQ( m.getElement( 3, 4 ), 42 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 42 ); - EXPECT_EQ( m.getElement( 4, 4 ), 42 ); - EXPECT_EQ( m.getElement( 4, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 42 ); - EXPECT_EQ( m.getElement( 5, 5 ), 42 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 0 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 42 ); + Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + m.setValue( 1.0 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 1 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 1 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); } template< typename Matrix > @@ -429,61 +335,70 @@ void test_SetElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 5x5 matrix: * - * / 1 2 0 0 0 \ - * | 6 7 8 0 0 | - * | 0 12 13 14 0 | - * | 0 0 18 19 20 | - * \ 0 0 0 24 25 / + * / 1 2 0 0 5 \ + * | 0 7 8 0 0 | + * | 0 0 13 14 0 | + * | 16 0 0 19 20 | + * \ 0 22 0 0 25 / */ const IndexType rows = 5; const IndexType cols = 5; - - Matrix m( rows, cols ); + DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + Matrix m( rows, cols, diagonals ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) { - if( abs( i - j ) > 1 ) + bool found( false ); + for( IndexType k = 0; k < diagonals.getSize(); k++ ) + { + if( i + diagonals[ k ] == j ) + { + m.setElement( i, j, value++ ); + found = true; + break; + } + } + if( ! found ) { EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); } - else - m.setElement( i, j, value++ ); } EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 0 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); } @@ -493,123 +408,137 @@ void test_AddElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* - * Sets up the following 6x5 matrix: + * Sets up the following 5x5 matrix: * - * / 1 2 0 0 0 \ - * | 6 7 8 0 0 | - * | 0 12 13 14 0 | - * | 0 0 18 19 20 | - * | 0 0 0 24 25 | - * \ 0 0 0 0 30 / + * / 1 2 0 0 5 \ + * | 0 7 8 0 0 | + * | 0 0 13 14 0 | + * | 0 0 0 19 20 | + * \ 0 0 0 0 25 / */ - - const IndexType rows = 6; + const IndexType rows = 5; const IndexType cols = 5; + DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + Matrix m( rows, cols, diagonals ); - Matrix m( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - { - if( abs( i - j ) <= 1 ) - m.setElement( i, j, value ); - value++; - } + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + bool found( false ); + for( IndexType k = 0; k < diagonals.getSize(); k++ ) + { + if( i + diagonals[ k ] == j ) + { + if( j >= i ) + m.setElement( i, j, value++ ); + else value++; + found = true; + break; + } + } + if( ! found ) + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + } // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 0 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 0 ); EXPECT_EQ( m.getElement( 3, 0 ), 0 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 0 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 matrix: * - * / 1 2 0 0 0 \ / 1 2 0 0 0 \ / 3 6 0 0 0 \ - * | 6 7 8 0 0 | | 3 4 5 0 0 | | 15 18 21 0 0 | - * 2 * | 0 12 13 14 0 | + | 0 6 7 8 0 | = | 0 30 33 36 0 | - * | 0 0 18 19 20 | | 0 0 9 10 11 | | 0 0 45 48 51 | - * | 0 0 0 24 25 | | 0 0 0 12 13 | | 0 0 0 60 63 | - * \ 0 0 0 0 30 / \ 0 0 0 0 14 / \ 0 0 0 0 74 / + * / 1 2 0 0 5 \ / 1 2 0 0 5 \ / 3 6 0 0 15 \ + * | 0 7 8 0 0 | | 0 7 8 0 0 | | 0 21 24 0 0 | + * 2 * | 0 0 13 14 0 | + | 0 0 13 14 0 | = | 0 0 39 42 0 | + * | 0 0 0 19 20 | | 16 0 0 19 20 | | 16 0 0 57 60 | + * \ 0 0 0 0 25 / \ 0 22 0 0 25 / \ 0 22 0 0 75 / + * */ - RealType newValue = 1; + value = 1; RealType multiplicator = 2; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - if( abs( i - j ) <= 1 ) - m.addElement( i, j, newValue++, multiplicator ); + { + bool found( false ); + for( IndexType k = 0; k < diagonals.getSize(); k++ ) + { + if( i + diagonals[ k ] == j ) + { + m.addElement( i, j, value++, multiplicator ); + found = true; + break; + } + } + if( ! found ) + { + EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error ); + } + } EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - EXPECT_EQ( m.getElement( 1, 0 ), 15 ); - EXPECT_EQ( m.getElement( 1, 1 ), 18 ); - EXPECT_EQ( m.getElement( 1, 2 ), 21 ); + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); EXPECT_EQ( m.getElement( 1, 3 ), 0 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 30 ); - EXPECT_EQ( m.getElement( 2, 2 ), 33 ); - EXPECT_EQ( m.getElement( 2, 3 ), 36 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 45 ); - EXPECT_EQ( m.getElement( 3, 3 ), 48 ); - EXPECT_EQ( m.getElement( 3, 4 ), 51 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 60 ); - EXPECT_EQ( m.getElement( 4, 4 ), 63 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 74 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); } template< typename Matrix > @@ -618,58 +547,75 @@ void test_SetRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* - * Sets up the following 3x7 matrix: + * Sets up the following 5x7 matrix: * - * / 1 2 0 0 0 0 0 \ - * | 8 9 10 0 0 0 0 | - * \ 0 16 17 18 0 0 0 / + * / 1 0 2 0 3 0 0 \ + * | 4 5 0 6 0 7 0 | + * | 0 8 9 0 10 0 11 | + * | 0 0 12 13 0 14 0 | + * \ 0 0 0 15 16 0 17 / */ - const IndexType rows = 3; + const IndexType rows = 5; const IndexType cols = 7; - Matrix m( rows, cols ); + Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) ); auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 3 ][ 3 ] { - { 1, 2, 0 }, - { 8, 9, 10 }, - { 16, 17, 18 } }; + RealType values[ 5 ][ 4 ] { + { 0, 1, 2, 3 }, + { 4, 5, 6, 7 }, + { 8, 9, 10, 11 }, + { 12, 13, 14, 0 }, + { 15, 16, 17, 0 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 3; i++ ) - { - if( rowIdx == 0 && i > 1 ) - break; + for( IndexType i = 0; i < 4; i++ ) row.setElement( i, values[ rowIdx ][ i ] ); - } }; - TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType) 0, rows, f ); EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); EXPECT_EQ( m.getElement( 0, 5 ), 0 ); EXPECT_EQ( m.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 0 ), 8 ); - EXPECT_EQ( m.getElement( 1, 1 ), 9 ); - EXPECT_EQ( m.getElement( 1, 2 ), 10 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 0 ), 4 ); + EXPECT_EQ( m.getElement( 1, 1 ), 5 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 7 ); EXPECT_EQ( m.getElement( 1, 6 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 16 ); - EXPECT_EQ( m.getElement( 2, 2 ), 17 ); - EXPECT_EQ( m.getElement( 2, 3 ), 18 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 10 ); EXPECT_EQ( m.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 11 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 14 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 4 ), 16 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 17 ); } template< typename Matrix > @@ -678,27 +624,31 @@ void test_AddRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + /* * Sets up the following 6x5 matrix: * - * / 1 2 0 0 0 \ - * | 6 7 8 0 0 | - * | 0 12 13 14 0 | - * | 0 0 18 19 20 | - * | 0 0 0 24 25 | - * \ 0 0 0 0 30 / + * / 1 2 3 0 0 \ + * | 0 7 8 9 0 | + * | 0 0 13 14 15 | + * | 0 0 0 19 20 | + * | 0 0 0 0 25 | + * \ 0 0 0 0 0 / */ const IndexType rows = 6; const IndexType cols = 5; + DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } ); - Matrix m( rows, cols ); + Matrix m( rows, cols, diagonals ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) { - if( abs( i - j ) <= 1 ) + IndexType offset = j - i; + if( diagonals.containsValue( offset ) && offset >= 0) m.setElement( i, j, value ); value++; } @@ -706,63 +656,63 @@ void test_AddRow() // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); EXPECT_EQ( m.getElement( 3, 0 ), 0 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 0 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 sparse matrix: * - * / 0 0 0 0 0 0 \ / 1 2 0 0 0 \ / 11 11 0 0 0 \ / 11 11 0 0 0 \ - * | 0 1 0 0 0 0 | | 6 7 8 0 0 | | 22 22 22 0 0 | | 28 29 30 0 0 | - * | 0 0 2 0 0 0 | * | 0 12 13 14 0 | + | 0 33 33 33 0 | = | 0 57 59 61 0 | - * | 0 0 0 3 0 0 | | 0 0 18 19 20 | | 0 0 44 44 44 | | 0 0 98 101 104 | - * | 0 0 0 0 4 0 | | 0 0 0 24 25 | | 0 0 0 55 55 | | 0 0 0 151 155 | - * \ 0 0 0 0 0 5 / \ 0 0 0 0 30 / \ 0 0 0 0 66 / \ 0 0 0 0 216 / + * / 0 0 0 0 0 0 \ / 1 2 3 0 0 \ / 11 0 0 0 0 \ / 11 0 0 0 0 \ + * | 0 1 0 0 0 0 | | 0 7 8 9 0 | | 0 22 0 0 0 | | 0 29 8 9 0 | + * | 0 0 2 0 0 0 | * | 0 0 13 14 15 | + | 33 0 33 0 0 | = | 33 0 59 28 30 | + * | 0 0 0 3 0 0 | | 0 0 0 19 20 | | 0 44 0 44 0 | | 0 44 0 101 60 | + * | 0 0 0 0 4 0 | | 0 0 0 0 25 | | 0 0 55 0 55 | | 0 0 55 0 155 | + * \ 0 0 0 0 0 5 / \ 0 0 0 0 0 / \ 0 0 0 66 0 / \ 0 0 0 66 0 / */ auto matrix_view = m.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - RealType values[ 6 ][ 3 ] { - { 11, 11, 0 }, - { 22, 22, 22 }, - { 33, 33, 33 }, - { 44, 44, 44 }, - { 55, 55, 55 }, - { 66, 66, 66 } }; + RealType values[ 6 ][ 4 ] { + { 0, 11, 0, 0 }, + { 0, 22, 0, 0 }, + { 33, 33, 0, 0 }, + { 44, 44, 0, 0 }, + { 55, 55, 0, 0 }, + { 66, 0, 0, 0 } }; auto row = matrix_view.getRow( rowIdx ); - for( IndexType i = 0; i < 3; i++ ) + for( IndexType i = 0; i < 4; i++ ) { RealType& val = row.getValue( i ); val = rowIdx * val + values[ rowIdx ][ i ]; @@ -770,42 +720,41 @@ void test_AddRow() }; TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 29 ); - EXPECT_EQ( m.getElement( 1, 2 ), 30 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 59 ); - EXPECT_EQ( m.getElement( 2, 3 ), 61 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 3 ), 28 ); + EXPECT_EQ( m.getElement( 2, 4 ), 30 ); EXPECT_EQ( m.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 1 ), 44 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 101 ); - EXPECT_EQ( m.getElement( 3, 4 ), 104 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 2 ), 55 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 155 ); EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 216 ); + EXPECT_EQ( m.getElement( 5, 3 ), 66 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); } template< typename Matrix > @@ -814,6 +763,7 @@ void test_VectorProduct() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 5x4 matrix: @@ -826,8 +776,9 @@ void test_VectorProduct() */ const IndexType rows = 5; const IndexType cols = 4; + DiagonalsShiftsType diagonals{ -1, 0, 1 }; - Matrix m( rows, cols ); + Matrix m( rows, cols, diagonals ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) @@ -861,6 +812,8 @@ void test_AddMatrix() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; + using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType; + using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType; /* * Sets up the following 5x4 matrix: @@ -873,8 +826,10 @@ void test_AddMatrix() */ const IndexType rows = 5; const IndexType cols = 4; + DiagonalsShiftsType1 diagonals1; + DiagonalsShiftsType2 diagonals2; - Matrix1 m( rows, cols ); + Matrix1 m( rows, cols, diagonals1 ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) @@ -894,7 +849,7 @@ void test_AddMatrix() * | 0 0 9 10 | * \ 0 0 0 11 / */ - Matrix2 m2( rows, cols ); + Matrix2 m2( rows, cols, diagonals2 ); RealType newValue = 1; for( IndexType i = 0; i < rows; i++ ) @@ -1457,7 +1412,7 @@ TYPED_TEST( MatrixTest, getNonemptyRowsCountTest ) test_GetNonemptyRowsCount< MatrixType >(); } -/* + TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1465,13 +1420,6 @@ TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) test_GetCompressedRowLengths< MatrixType >(); } -TYPED_TEST( MatrixTest, getRowLengthTest ) -{ - using MatrixType = typename TestFixture::MatrixType; - - test_GetRowLength< MatrixType >(); -} - TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -1535,7 +1483,7 @@ TYPED_TEST( MatrixTest, vectorProductTest ) test_VectorProduct< MatrixType >(); } -TYPED_TEST( MatrixTest, addMatrixTest ) +/*TYPED_TEST( MatrixTest, addMatrixTest ) { using MatrixType = typename TestFixture::MatrixType; -- GitLab From 6747a5465e225f397f3935757f5d3a689be81a60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 14 Jan 2020 21:37:50 +0100 Subject: [PATCH 092/179] Fixed multidiagonal matrix. --- src/TNL/Matrices/Multidiagonal.h | 9 +- src/TNL/Matrices/Multidiagonal.hpp | 60 +++++++--- .../Matrices/MultidiagonalMatrixTest.h | 106 +++++++----------- 3 files changed, 94 insertions(+), 81 deletions(-) diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 1741c0c75..9e5f92295 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -179,8 +179,13 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > Multidiagonal& operator=( const Multidiagonal& matrix ); // cross-device copy assignment - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ); + template< typename Real_, + typename Device_, + typename Index_, + bool RowMajorOrder_, + typename RealAllocator_, + typename IndexAllocator_ > + Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ); void save( File& file ) const; diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 53e3c7f2f..b88511501 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -648,15 +648,17 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_, typename IndexAllocator_ > Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) +operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, - "unknown device" ); + using RHSMatrix = Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; this->setLike( matrix ); if( RowMajorOrder == RowMajorOrder_ ) @@ -673,13 +675,45 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo } else { - Multidiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; - auxMatrix = matrix; - const auto matrix_view = auxMatrix.getView(); - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { - value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; - }; - this->forAllRows( f ); + const IndexType maxRowLength = this->diagonalsShifts.getSize(); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + value = thisValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } } } } diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index 514ea39e0..21a836d2d 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -768,15 +768,15 @@ void test_VectorProduct() /* * Sets up the following 5x4 matrix: * - * / 1 2 0 0 \ - * | 5 6 7 0 | - * | 0 10 11 12 | - * | 0 0 15 16 | - * \ 0 0 0 20 / + * / 1 0 3 0 \ + * | 0 6 0 8 | + * | 9 0 11 0 | + * | 0 14 0 16 | + * \ 0 0 19 0 / */ const IndexType rows = 5; const IndexType cols = 4; - DiagonalsShiftsType diagonals{ -1, 0, 1 }; + DiagonalsShiftsType diagonals{ -2, 0, 2 }; Matrix m( rows, cols, diagonals ); @@ -784,7 +784,7 @@ void test_VectorProduct() for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) { - if( abs( i - j ) <= 1 ) + if( diagonals.containsValue( j - i ) ) m.setElement( i, j, value ); value++; } @@ -799,11 +799,11 @@ void test_VectorProduct() m.vectorProduct( inVector, outVector); - EXPECT_EQ( outVector.getElement( 0 ), 6 ); - EXPECT_EQ( outVector.getElement( 1 ), 36 ); - EXPECT_EQ( outVector.getElement( 2 ), 66 ); - EXPECT_EQ( outVector.getElement( 3 ), 62 ); - EXPECT_EQ( outVector.getElement( 4 ), 40 ); + EXPECT_EQ( outVector.getElement( 0 ), 8 ); + EXPECT_EQ( outVector.getElement( 1 ), 28 ); + EXPECT_EQ( outVector.getElement( 2 ), 40 ); + EXPECT_EQ( outVector.getElement( 3 ), 60 ); + EXPECT_EQ( outVector.getElement( 4 ), 38 ); } template< typename Matrix1, typename Matrix2 = Matrix1 > @@ -935,6 +935,7 @@ void test_GetMatrixProduct() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 5x4 matrix: * @@ -946,10 +947,9 @@ void test_GetMatrixProduct() */ const IndexType leftRows = 5; const IndexType leftCols = 4; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); - Matrix leftMatrix; - leftMatrix.reset(); - leftMatrix.setDimensions( leftRows, leftCols ); + Matrix leftMatrix( leftRows, leftCols, diagonalsShifts ); RealType value = 1; for( IndexType i = 0; i < leftRows; i++ ) @@ -986,9 +986,7 @@ void test_GetMatrixProduct() * \ 0 0 0 0 / */ - Matrix mResult; - mResult.reset(); - mResult.setDimensions( leftRows, rightCols ); + Matrix mResult( leftRows, rightCols, diagonalsShifts ); mResult.setValue( 0 ); RealType leftMatrixMultiplicator = 1; @@ -1040,6 +1038,7 @@ void test_GetTransposition() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 3x2 matrix: * @@ -1049,10 +1048,9 @@ void test_GetTransposition() */ const IndexType rows = 3; const IndexType cols = 2; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols, diagonalsShifts ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) @@ -1067,9 +1065,7 @@ void test_GetTransposition() * / 0 0 0 \ * \ 0 0 0 / */ - Matrix mTransposed; - mTransposed.reset(); - mTransposed.setDimensions( cols, rows ); + Matrix mTransposed( cols, rows, diagonalsShifts ); mTransposed.print( std::cout ); @@ -1102,6 +1098,7 @@ void test_PerformSORIteration() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 4x4 matrix: * @@ -1112,10 +1109,9 @@ void test_PerformSORIteration() */ const IndexType rows = 4; const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + Matrix m( rows, cols, diagonalsShifts ); m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0 ); @@ -1178,33 +1174,35 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); - MultidiagonalHost hostMatrix( rows, columns ); + DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } ); + MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) - if( abs( i - j ) <= 1 ) + if( diagonalsShifts.containsValue( j - i ) ) hostMatrix.setElement( i, j, i + j ); - Matrix matrix( rows, columns ); + Matrix matrix( rows, columns, diagonalsShifts ); matrix.getValues() = 0.0; matrix = hostMatrix; for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j < rows; j++ ) - if( abs( i - j ) <= 1 ) + if( diagonalsShifts.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); #ifdef HAVE_CUDA - MultidiagonalCuda cudaMatrix( rows, columns ); + MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) - if( abs( i - j ) <= 1 ) + if( diagonalsShifts.containsValue( j - i ) ) cudaMatrix.setElement( i, j, i + j ); matrix.getValues() = 0.0; @@ -1212,7 +1210,7 @@ void test_AssignmentOperator() for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) { - if( abs( i - j ) <= 1 ) + if( diagonalsShifts.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); @@ -1227,6 +1225,7 @@ void test_SaveAndLoad() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 4x4 matrix: @@ -1238,14 +1237,15 @@ void test_SaveAndLoad() */ const IndexType rows = 4; const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); - Matrix savedMatrix( rows, cols ); + Matrix savedMatrix( rows, cols, diagonalsShifts ); RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) { - if( abs( i - j ) <= 1 ) + if( diagonalsShifts.containsValue( j - i ) ) savedMatrix.setElement( i, j, value ); value++; } @@ -1303,6 +1303,7 @@ void test_Print() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; /* * Sets up the following 5x4 sparse matrix: @@ -1315,8 +1316,9 @@ void test_Print() */ const IndexType rows = 5; const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); - Matrix m( rows, cols ); + Matrix m( rows, cols, diagonalsShifts ); RealType value = 1; for( IndexType i = 0; i < rows; i++) @@ -1488,20 +1490,7 @@ TYPED_TEST( MatrixTest, vectorProductTest ) using MatrixType = typename TestFixture::MatrixType; test_AddMatrix< MatrixType >(); -} - -TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) -{ - using MatrixType = typename TestFixture::MatrixType; - - using RealType = typename MatrixType::RealType; - using DeviceType = typename MatrixType::DeviceType; - using IndexType = typename MatrixType::IndexType; - using RealAllocatorType = typename MatrixType::RealAllocatorType; - using MatrixType2 = TNL::Matrices::Multidiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; - - test_AddMatrix< MatrixType, MatrixType2 >(); -} +}*/ TYPED_TEST( MatrixTest, assignmentOperatorTest ) { @@ -1523,21 +1512,6 @@ TYPED_TEST( MatrixTest, printTest ) test_Print< MatrixType >(); } -*/ - -//// test_getType is not general enough yet. DO NOT TEST IT YET. - -//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Host ) -//{ -// host_test_GetType< Multidiagonal_host_float, Multidiagonal_host_int >(); -//} -// -//#ifdef HAVE_CUDA -//TEST( MultidiagonalMatrixTest, Multidiagonal_GetTypeTest_Cuda ) -//{ -// cuda_test_GetType< Multidiagonal_cuda_float, Multidiagonal_cuda_int >(); -//} -//#endif /*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host ) { -- GitLab From 71a1f300e71ef90e6d81f07e553c11eb1d227b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 14 Jan 2020 22:25:05 +0100 Subject: [PATCH 093/179] Fixing multidiagonal matrix with CUDA. --- src/TNL/Matrices/Matrix.h | 10 ++++++---- src/TNL/Matrices/Multidiagonal.hpp | 2 +- src/TNL/Matrices/MultidiagonalMatrixView.h | 8 ++++---- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index ebe7ccc21..0b34a5a57 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -76,14 +76,16 @@ public: __cuda_callable__ IndexType getColumns() const; - virtual void setElement( const IndexType row, + //virtual TODO: uncomment + void setElement( const IndexType row, const IndexType column, - const RealType& value ) = 0; + const RealType& value );// = 0; - virtual void addElement( const IndexType row, + //virtual TODO: uncomment + void addElement( const IndexType row, const IndexType column, const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ) = 0; + const RealType& thisElementMultiplicator = 1.0 );// = 0; virtual Real getElement( const IndexType row, const IndexType column ) const = 0; diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index b88511501..7bc83f2d4 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -707,7 +707,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo //// // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; value = thisValuesBuffer_view[ bufferIdx ]; }; diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index 3d33ac0ae..1e5a9bd28 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -30,10 +30,10 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > using DeviceType = Device; using IndexType = Index; using BaseType = MatrixView< Real, Device, Index >; - using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >; - using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; - using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; - using HostDiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >; + using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >; + //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; + using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >; using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; using ValuesViewType = typename BaseType::ValuesView; using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 1ba8dc34d..2839c997a 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -398,7 +398,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) - function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx, 0 ) ] ); + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); -- GitLab From 537628295e5e7127f5d202e3f73a61103acaad15 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 15 Jan 2020 14:42:18 +0100 Subject: [PATCH 094/179] Avoiding Matrix::getCompressedRowLengths. --- src/Python/pytnl/tnl/SparseMatrix.h | 2 +- src/TNL/Matrices/Matrix.h | 2 +- src/TNL/Matrices/Matrix.hpp | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index b4cc0fc1a..e4064e1a4 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -51,7 +51,7 @@ void export_Matrix( py::module & m, const char* name ) using VectorType = TNL::Containers::Vector< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >; - void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVector&) const = &Matrix::getCompressedRowLengths; + void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVectorView) const = &Matrix::getCompressedRowLengths; auto matrix = py::class_< Matrix, TNL::Object >( m, name ) .def(py::init<>()) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 0b34a5a57..a5f2b6b8f 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -57,7 +57,7 @@ public: // TODO: implementation is not parallel // TODO: it would be nice if padding zeros could be stripped - void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + //void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 4ddbacde5..0710ca829 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -56,7 +56,7 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp this->columns = columns; } -template< typename Real, +/*template< typename Real, typename Device, typename Index, typename RealAllocator > @@ -64,7 +64,7 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp { rowLengths.setSize( this->getRows() ); getCompressedRowLengths( rowLengths.getView() ); -} +}*/ template< typename Real, typename Device, @@ -72,9 +72,9 @@ template< typename Real, typename RealAllocator > void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { - //TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); - //for( IndexType row = 0; row < this->getRows(); row++ ) - // rowLengths.setElement( row, this->getRowLength( row ) ); + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); } template< typename Real, -- GitLab From 376b4bffe77bdbc005f2692b66de09f107d47d16 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 15 Jan 2020 14:42:57 +0100 Subject: [PATCH 095/179] Fixing multidiagonal matrix unit tests fro CUDA. --- .../Matrices/MultidiagonalMatrixTest.h | 54 +++++-------------- 1 file changed, 13 insertions(+), 41 deletions(-) diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index 21a836d2d..49bcfa11c 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -159,7 +159,7 @@ void test_GetCompressedRowLengths() * | 0 0 0 0 1 0 1 0 | -> 2 * \ 0 0 0 0 0 1 0 0 / -> 1 */ - + const IndexType rows = 8; const IndexType cols = 8; @@ -354,22 +354,12 @@ void test_SetElement() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - { - bool found( false ); - for( IndexType k = 0; k < diagonals.getSize(); k++ ) - { - if( i + diagonals[ k ] == j ) - { - m.setElement( i, j, value++ ); - found = true; - break; - } - } - if( ! found ) + if( diagonals.containsValue( j - i ) ) + m.setElement( i, j, value++ ); + else { EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); } - } EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); @@ -427,24 +417,16 @@ void test_AddElement() RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - { - bool found( false ); - for( IndexType k = 0; k < diagonals.getSize(); k++ ) + if( diagonals.containsValue( j - i ) ) { - if( i + diagonals[ k ] == j ) - { - if( j >= i ) - m.setElement( i, j, value++ ); - else value++; - found = true; - break; - } + if( j >= i ) + m.setElement( i, j, value ); + value++; } - if( ! found ) + else { EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); } - } // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); @@ -486,29 +468,19 @@ void test_AddElement() * 2 * | 0 0 13 14 0 | + | 0 0 13 14 0 | = | 0 0 39 42 0 | * | 0 0 0 19 20 | | 16 0 0 19 20 | | 16 0 0 57 60 | * \ 0 0 0 0 25 / \ 0 22 0 0 25 / \ 0 22 0 0 75 / - * + * */ value = 1; RealType multiplicator = 2; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - { - bool found( false ); - for( IndexType k = 0; k < diagonals.getSize(); k++ ) - { - if( i + diagonals[ k ] == j ) - { - m.addElement( i, j, value++, multiplicator ); - found = true; - break; - } - } - if( ! found ) + if( diagonals.containsValue( j - i ) ) + m.addElement( i, j, value++, multiplicator ); + else { EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error ); } - } EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); -- GitLab From 4310648979b9c849d893287cb4deda1d03c359c0 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 15 Jan 2020 14:45:12 +0100 Subject: [PATCH 096/179] Changing Matrix::[set,add]Element from bool to void with expcetion throwing. --- src/TNL/Matrices/Dense.h | 4 ++-- src/TNL/Matrices/Dense.hpp | 20 +++++++++++--------- src/TNL/Matrices/DenseMatrixView.h | 4 ++-- src/TNL/Matrices/DenseMatrixView.hpp | 20 +++++++++++--------- src/TNL/Matrices/Matrix.hpp | 6 +++--- src/TNL/Matrices/SparseMatrixView.h | 4 ++-- src/TNL/Matrices/SparseMatrixView.hpp | 20 ++++++++++++-------- src/TNL/Matrices/Tridiagonal.hpp | 4 ++-- src/TNL/Matrices/TridiagonalMatrixView.hpp | 2 -- 9 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 778fd0bd4..485c94701 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -107,11 +107,11 @@ class Dense : public Matrix< Real, Device, Index > const Real& operator()( const IndexType row, const IndexType column ) const; - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index c4deeb6fa..7e6f37948 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -289,12 +289,13 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) { this->values.setElement( this->getElementIndex( row, column ), value ); - return true; } template< typename Real, @@ -302,10 +303,12 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) { const IndexType elementIndex = this->getElementIndex( row, column ); if( thisElementMultiplicator == 1.0 ) @@ -314,7 +317,6 @@ bool Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addElement( con else this->values.setElement( elementIndex, thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); - return true; } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 23f5d7317..9bad424b2 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -111,11 +111,11 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const Real& operator()( const IndexType row, const IndexType column ) const; - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 48c0ccdc3..21f6d79ef 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -240,22 +240,25 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) { this->values.setElement( this->getElementIndex( row, column ), value ); - return true; } template< typename Real, typename Device, typename Index, bool RowMajorOrder > -bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) { const IndexType elementIndex = this->getElementIndex( row, column ); if( thisElementMultiplicator == 1.0 ) @@ -264,7 +267,6 @@ bool DenseMatrixView< Real, Device, Index, RowMajorOrder >::addElement( const In else this->values.setElement( elementIndex, thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); - return true; } template< typename Real, diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 0710ca829..2d5906d23 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -139,12 +139,12 @@ getValues() const { return this->values; } - + template< typename Real, typename Device, typename Index, typename RealAllocator > -typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& +typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& Matrix< Real, Device, Index, RealAllocator >:: getValues() { @@ -237,7 +237,7 @@ template< typename Real, typename Device, typename Index, typename RealAllocator > -void +void Matrix< Real, Device, Index, RealAllocator >:: computeColorsVector(Containers::Vector &colorsVector) { diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 1f587acf3..aba3b4642 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -92,11 +92,11 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ RowView getRow( const IndexType& rowIdx ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index d836fe5e9..3b192b4e9 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -54,7 +54,7 @@ auto SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: getView() -> ViewType { - return ViewType( this->getRows(), + return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->columnIndexes.getView(), @@ -204,13 +204,13 @@ template< typename Real, typename Index, typename MatrixType, template< typename, typename > class SegmentsView > -bool +void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { - return this->addElement( row, column, value, 0.0 ); + this->addElement( row, column, value, 0.0 ); } template< typename Real, @@ -218,7 +218,7 @@ template< typename Real, typename Index, typename MatrixType, template< typename, typename > class SegmentsView > -bool +void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: addElement( const IndexType row, const IndexType column, @@ -244,18 +244,22 @@ addElement( const IndexType row, if( col == column ) { this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); - return true; + return; } if( col == this->getPaddingIndex() || col > column ) break; } if( i == rowSize ) - return false; + { + std::stringstream msg; + msg << "The capacity of the sparse matrix row number " << row << " was exceeded."; + throw std::logic_error( msg.str() ); + } if( col == this->getPaddingIndex() ) { this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); - return true; + return; } else { @@ -273,7 +277,7 @@ addElement( const IndexType row, this->columnIndexes.setElement( globalIdx, column ); this->values.setElement( globalIdx, value ); - return true; + return; } } diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 41d722c6a..8f4f4e190 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -404,7 +404,7 @@ template< typename Real, typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType +typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { @@ -418,7 +418,7 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > -void +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index e851d2a1f..008becb09 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -228,7 +228,6 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) throw std::logic_error( msg.str() ); } this->values.setElement( this->getElementIndex( row, column ), value ); - return true; } template< typename Real, @@ -254,7 +253,6 @@ addElement( const IndexType row, } const Index i = this->getElementIndex( row, column ); this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; } template< typename Real, -- GitLab From cdc03609b8f270645060bbb08db97cac64b10beb Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 15 Jan 2020 14:45:58 +0100 Subject: [PATCH 097/179] Renaming nonEmptyRows to nonemptyRows in tridiagonal matrix indexer. --- src/TNL/Matrices/TridiagonalMatrixView.hpp | 4 ++-- .../details/TridiagonalMatrixIndexer.h | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 008becb09..d8fa6061c 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -326,7 +326,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { - this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero ); + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); } template< typename Real, @@ -418,7 +418,7 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: forAllRows( Function& function ) { - this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function ); + this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); } template< typename Real, diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h index 6d3377b4f..64beb44f7 100644 --- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -26,21 +26,21 @@ class TridiagonalMatrixIndexer __cuda_callable__ TridiagonalMatrixIndexer() - : rows( 0 ), columns( 0 ), nonEmptyRows( 0 ){}; + : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){}; __cuda_callable__ TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns ) - : rows( rows ), columns( columns ), nonEmptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {}; + : rows( rows ), columns( columns ), nonemptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {}; __cuda_callable__ TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer ) - : rows( indexer.rows ), columns( indexer.columns ), nonEmptyRows( indexer.nonEmptyRows ) {}; + : rows( indexer.rows ), columns( indexer.columns ), nonemptyRows( indexer.nonemptyRows ) {}; void setDimensions( const IndexType& rows, const IndexType& columns ) { this->rows = rows; this->columns = columns; - this->nonEmptyRows = min( rows, columns ) + ( rows > columns ); + this->nonemptyRows = min( rows, columns ) + ( rows > columns ); }; __cuda_callable__ @@ -65,9 +65,9 @@ class TridiagonalMatrixIndexer const IndexType& getColumns() const { return this->columns; }; __cuda_callable__ - const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; }; + const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; }; __cuda_callable__ - IndexType getStorageSize() const { return 3 * this->nonEmptyRows; }; + IndexType getStorageSize() const { return 3 * this->nonemptyRows; }; __cuda_callable__ IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const @@ -76,16 +76,16 @@ class TridiagonalMatrixIndexer TNL_ASSERT_LT( localIdx, 3, "" ); TNL_ASSERT_GE( rowIdx, 0, "" ); TNL_ASSERT_LT( rowIdx, this->rows, "" ); - + if( RowMajorOrder ) return 3 * rowIdx + localIdx; else - return localIdx * nonEmptyRows + rowIdx; + return localIdx * nonemptyRows + rowIdx; }; protected: - IndexType rows, columns, nonEmptyRows; + IndexType rows, columns, nonemptyRows; }; } //namespace details } // namespace Materices -- GitLab From fcb368b75abc9fbce5a0e4cf8e31c18935fc1fdb Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 15 Jan 2020 18:05:53 +0100 Subject: [PATCH 098/179] Forwarding dense matrix methods calls to DenseMatrixView. --- src/TNL/Containers/Segments/EllpackView.h | 2 + src/TNL/Containers/Segments/EllpackView.hpp | 13 ++ src/TNL/Matrices/Dense.h | 4 +- src/TNL/Matrices/Dense.hpp | 136 +++++--------------- src/TNL/Matrices/DenseMatrixView.h | 10 -- src/TNL/Matrices/DenseMatrixView.hpp | 77 ++++------- src/UnitTests/Matrices/DenseMatrixTest.h | 4 +- 7 files changed, 71 insertions(+), 175 deletions(-) diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 737810498..f64b04068 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -104,6 +104,8 @@ class EllpackView template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + EllpackView& operator=( const EllpackView& view ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index 21be88654..719a349a9 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -291,6 +291,19 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +EllpackView< Device, Index, RowMajorOrder, Alignment >& +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view ) +{ + this->segmentSize = view.segmentSize; + this->size = view.size; + this->alignedSize = view.alignedSize; +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 485c94701..2e71316e9 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -168,7 +168,7 @@ class Dense : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment - //Dense& operator=( const Dense& matrix ); + Dense& operator=( const Dense& matrix ); // cross-device copy assignment template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > @@ -200,6 +200,8 @@ class Dense : public Matrix< Real, Device, Index > friend class DenseDeviceDependentCode< DeviceType >; SegmentsType segments; + + ViewType view; }; } // namespace Matrices diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 7e6f37948..b8fa96907 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -76,10 +76,7 @@ String Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getSerializationType() { - return String( "Matrices::Dense< " ) + - TNL::getSerializationType< RealType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; + return ViewType::getSerializationType(); } template< typename Real, @@ -108,6 +105,7 @@ setDimensions( const IndexType rows, this->segments.setSegmentsSizes( rows, columns ); this->values.setSize( rows * columns ); this->values = 0.0; + this->view = this->getView(); } template< typename Real, @@ -145,19 +143,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { - rowLengths.setSize( this->getRows() ); - rowLengths = 0; - auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { - return ( value != 0.0 ); - }; - auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; - }; - this->allRowsReduction( fetch, reduce, keep, 0 ); + this->view.getCompressedRowLengths( rowLengths ); } template< typename Real, @@ -197,11 +183,7 @@ template< typename Real, typename RealAllocator > Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const { - const auto values_view = this->values.getConstView(); - auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { - return ( values_view[ i ] != 0.0 ); - }; - return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); + return this->view.getNumberOfNonzeroMatrixElements(); } template< typename Real, @@ -221,7 +203,7 @@ template< typename Real, typename RealAllocator > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value ) { - this->values = value; + this->view.setValue( value ); } template< typename Real, @@ -233,8 +215,7 @@ __cuda_callable__ auto Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { - TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); - return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); + return this->view.getRow( rowIdx ); } template< typename Real, @@ -246,8 +227,7 @@ __cuda_callable__ auto Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { - TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); - return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); + return this->view.getRow( rowIdx ); } template< typename Real, @@ -259,12 +239,7 @@ __cuda_callable__ Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, const IndexType column ) { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - return this->values.operator[]( this->getElementIndex( row, column ) ); + return this->view.operator()( row, column ); } template< typename Real, @@ -276,12 +251,7 @@ __cuda_callable__ const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, const IndexType column ) const { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - return this->values.operator[]( this->getElementIndex( row, column ) ); + return this->view.operator()( row, column ); } template< typename Real, @@ -295,7 +265,7 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) { - this->values.setElement( this->getElementIndex( row, column ), value ); + this->view.setElement( row, column, value ); } template< typename Real, @@ -310,13 +280,7 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - const IndexType elementIndex = this->getElementIndex( row, column ); - if( thisElementMultiplicator == 1.0 ) - this->values.setElement( elementIndex, - this->values.getElement( elementIndex ) + value ); - else - this->values.setElement( elementIndex, - thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); + this->view.addElement( row, column, value, thisElementMultiplicator ); } template< typename Real, @@ -329,7 +293,7 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { - return this->values.getElement( this->getElementIndex( row, column ) ); + return this->view.getElement( row, column ); } template< typename Real, @@ -342,12 +306,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { - const auto values_view = this->values.getConstView(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { - return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); - return zero; - }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); } template< typename Real, @@ -373,12 +332,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { - const auto values_view = this->values.getConstView(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); - return true; - }; - this->segments.forSegments( first, last, f ); + this->view.forRows( first, last, function ); } template< typename Real, @@ -391,12 +345,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { - auto values_view = this->values.getView(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); - return true; - }; - this->segments.forSegments( first, last, f ); + this->view.forRows( first, last, function ); } template< typename Real, @@ -435,11 +384,7 @@ __cuda_callable__ typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, const Vector& vector ) const { - RealType sum( 0.0 ); - // TODO: Fix this - //for( IndexType column = 0; column < this->getColumns(); column++ ) - // sum += this->getElementFast( row, column ) * vector[ column ]; - return sum; + return this->view.rowVectorProduct( row, vector ); } template< typename Real, @@ -453,27 +398,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); - - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); - const auto inVectorView = inVector.getConstView(); - auto outVectorView = outVector.getView(); - const auto valuesView = this->values.getConstView(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { - return valuesView[ offset ] * inVectorView[ column ]; - }; - auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { - sum += value; - }; - auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - outVectorView[ row ] = value; - }; - this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + this->view.vectorProduct( inVector, outVector ); } template< typename Real, @@ -959,7 +884,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } -/*template< typename Real, +template< typename Real, typename Device, typename Index, bool RowMajorOrder, @@ -968,7 +893,9 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) { - const IndexType bufferRowsCount( 128 ); + setLike( matrix ); + this->values = matrix.values; + /*const IndexType bufferRowsCount( 128 ); const IndexType columns = this->getColumns(); const size_t bufferSize = bufferRowsCount * columns; Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); @@ -1000,8 +927,8 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } - return *this; -}*/ + return *this;*/ +} template< typename Real, typename Device, @@ -1101,7 +1028,7 @@ template< typename Real, typename RealAllocator > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const { - Object::save( fileName ); + this->view.save( fileName ); } template< typename Real, @@ -1121,7 +1048,7 @@ template< typename Real, typename RealAllocator > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { - Matrix< Real, Device, Index >::save( file ); + this->view.save( file ); } template< typename Real, @@ -1141,13 +1068,7 @@ template< typename Real, typename RealAllocator > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - for( IndexType column = 0; column < this->getColumns(); column++ ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; - str << std::endl; - } + this->view.print( str ); } template< typename Real, @@ -1156,8 +1077,9 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator > __cuda_callable__ -Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row, - const IndexType column ) const +Index +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElementIndex( const IndexType row, const IndexType column ) const { return this->segments.getGlobalIndex( row, column ); } diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 9bad424b2..d963dd7c3 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -171,22 +171,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; - // copy assignment DenseMatrixView& operator=( const DenseMatrixView& matrix ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ); - void save( const String& fileName ) const; - void load( const String& fileName ); - void save( File& file ) const; - void load( File& file ); - void print( std::ostream& str ) const; protected: diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 21f6d79ef..527915d55 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -83,7 +83,7 @@ getSerializationType() { return String( "Matrices::Dense< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + - TNL::getSerializationType< IndexType >() + + TNL::getSerializationType< IndexType >() + ", " + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; } @@ -321,7 +321,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); @@ -339,7 +339,7 @@ forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, values_view[ globalIdx ] ); + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); @@ -395,14 +395,22 @@ template< typename Real, void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); - - //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { + return valuesView[ offset ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, @@ -885,39 +893,18 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( } -// copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder > DenseMatrixView< Real, Device, Index, RowMajorOrder >& -DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView& matrix ) -{ - this->setLike( matrix ); - this->values = matrix.values; - return *this; -} - -// cross-device copy assignment -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > - template< typename Real2, typename Device2, typename Index2, typename > -DenseMatrixView< Real, Device, Index, RowMajorOrder >& -DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ) +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const DenseMatrixView& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, - "unknown device" ); - - this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); + MatrixView< Real, Device, Index >::operator=( matrix ); + this->segments = matrix.segments; } - template< typename Real, typename Device, typename Index, @@ -927,15 +914,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& Object::save( fileName ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( const String& fileName ) -{ - Object::load( fileName ); -} - template< typename Real, typename Device, typename Index, @@ -945,15 +923,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) c MatrixView< Real, Device, Index >::save( file ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::load( File& file ) -{ - MatrixView< Real, Device, Index >::load( file ); -} - template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 0f7158010..a3e7e8f61 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -1234,9 +1234,7 @@ void test_SaveAndLoad() ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( rows, cols ); + Matrix loadedMatrix( rows, cols ); ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); -- GitLab From 990b916aad57fafe6998c60df19b2285b97b83db Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 17 Jan 2020 14:53:56 +0100 Subject: [PATCH 099/179] Forwarding SparseMatrix calls to SparseMatrixView. --- src/TNL/Containers/Segments/CSRView.h | 3 + src/TNL/Containers/Segments/CSRView.hpp | 15 +- src/TNL/Containers/Segments/EllpackView.hpp | 12 +- .../Containers/Segments/SlicedEllpackView.h | 2 + .../Containers/Segments/SlicedEllpackView.hpp | 27 ++- src/TNL/Matrices/Dense.hpp | 6 +- src/TNL/Matrices/DenseMatrixView.hpp | 10 +- src/TNL/Matrices/SparseMatrix.h | 16 +- src/TNL/Matrices/SparseMatrix.hpp | 155 ++++-------------- src/TNL/Matrices/SparseMatrixView.h | 2 + src/TNL/Matrices/SparseMatrixView.hpp | 82 +++++---- src/UnitTests/Matrices/Legacy/CMakeLists.txt | 12 +- .../Matrices/Legacy/SparseMatrixCopyTest.h | 36 ++-- 13 files changed, 174 insertions(+), 204 deletions(-) diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index a0f5cd200..2ad849f97 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -41,6 +41,7 @@ class CSRView __cuda_callable__ CSRView( const OffsetsView& offsets ); + __cuda_callable__ CSRView( const OffsetsView&& offsets ); __cuda_callable__ @@ -110,6 +111,8 @@ class CSRView template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + CSRView& operator=( const CSRView& view ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 2d2b58331..cc4d16fe6 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -171,9 +171,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType begin = offsetsView[ segmentIdx ]; const IndexType end = offsetsView[ segmentIdx + 1 ]; IndexType localIdx( 0 ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) - if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) - break; + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } @@ -220,6 +220,15 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index > +CSRView< Device, Index >& +CSRView< Device, Index >:: +operator=( const CSRView& view ) +{ + this->offsets.copy( view.offsets ); +} + template< typename Device, typename Index > void diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index 719a349a9..c0d0b3721 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -202,9 +202,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType begin = segmentIdx * segmentSize; const IndexType end = begin + segmentSize; IndexType localIdx( 0 ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) - if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) - break; + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } @@ -216,9 +216,9 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType begin = segmentIdx; const IndexType end = storageSize; IndexType localIdx( 0 ); - for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize ) - if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) - break; + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += alignedSize ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 86745e7c0..c8c73c3f2 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -106,6 +106,8 @@ class SlicedEllpackView template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + SlicedEllpackView& operator=( const SlicedEllpackView& view ); + void save( File& file ) const; void load( File& file ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 5f9cbdee3..98a3d9b81 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -14,6 +14,8 @@ #include #include +#include "SlicedEllpackView.h" + namespace TNL { namespace Containers { namespace Segments { @@ -240,7 +242,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) { - auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; @@ -249,14 +251,13 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) - if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) ) - break; + f( segmentIdx, localIdx++, globalIdx, compute, args... ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } else { - auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; const IndexType segmentInSliceIdx = segmentIdx % SliceSize; const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; @@ -265,8 +266,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const IndexType localIdx( 0 ); bool compute( true ); for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) - if( ! f( segmentIdx, localIdx++, globalIdx, compute, args... ) ) - break; + f( segmentIdx, localIdx++, globalIdx, compute, args... ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); } @@ -344,6 +344,21 @@ allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Re this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& view ) +{ + this->size = view.size; + this->alignedSize = view.alignedSize; + this->segmentsCount = view.segmentsCount; + this->sliceOffsets.copy( view.sliceOffsets ); + this->sliceSegmentSizes.copy( view.sliceSegmentSizes ); +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index b8fa96907..5504f6408 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -948,7 +948,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ > if( std::is_same< DeviceType, Device_ >::value ) { auto this_view = this->getView(); - auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { this_view.getRow( rowIdx ).setElement( columnIdx, value ); }; matrix.forAllRows( f ); @@ -971,7 +971,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ > //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable { + auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; sourceValuesBuffer_view[ bufferIdx ] = value; }; @@ -981,7 +981,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ > //// // Copy buffer to this matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; value = destinationValuesBuffer_view[ bufferIdx ]; }; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 527915d55..890606436 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -320,9 +320,8 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ] ); - return true; + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { + function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute ); }; this->segments.forSegments( first, last, f ); @@ -338,9 +337,8 @@ DenseMatrixView< Real, Device, Index, RowMajorOrder >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); - return true; + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute ); }; this->segments.forSegments( first, last, f ); diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 44883a124..43ea25bf5 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -108,11 +108,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ RowView getRow( const IndexType& rowIdx ); - bool setElement( const IndexType row, + void setElement( const IndexType row, const IndexType column, const RealType& value ); - bool addElement( const IndexType row, + void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ); @@ -172,7 +172,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Assignment of exactly the same matrix type. * @param matrix - * @return + * @return */ SparseMatrix& operator=( const SparseMatrix& matrix ); @@ -181,12 +181,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > */ template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ); - - + + /** * \brief Assignment of any other matrix type. * @param matrix - * @return + * @return */ template< typename RHSMatrix > SparseMatrix& operator=( const RHSMatrix& matrix ); @@ -213,7 +213,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > IndexAllocator indexAllocator; - RealAllocator realAllocator; + //RealAllocator realAllocator; + + ViewType view; }; } // namespace Conatiners diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 3f5636bb6..c94506084 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -84,7 +84,7 @@ auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getView() -> ViewType { - return ViewType( this->getRows(), + return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->columnIndexes.getView(), @@ -166,6 +166,7 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) this->values = ( RealType ) 0; this->columnIndexes.setSize( this->segments.getStorageSize() ); this->columnIndexes = this->getPaddingIndex(); + this->view = this->getView(); } template< typename Real, @@ -180,19 +181,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { - rowLengths.setSize( this->getRows() ); - rowLengths = 0; - auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { - return ( value != 0.0 ); - }; - auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; - }; - this->allRowsReduction( fetch, reduce, keep, 0 ); + this->view.getCompressedRowLengths( rowLengths ); } template< typename Real, @@ -221,12 +210,7 @@ Index SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getNumberOfNonzeroMatrixElements() const { - const auto columns_view = this->columnIndexes.getConstView(); - const IndexType paddingIndex = this->getPaddingIndex(); - auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { - return ( columns_view[ i ] != paddingIndex ); - }; - return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + this->view.getNumberOfNonzeroMatrixElements(); } template< typename Real, @@ -254,8 +238,7 @@ __cuda_callable__ auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { - TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); - return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); + return this->view.getRow( rowIdx ); } template< typename Real, @@ -269,8 +252,7 @@ __cuda_callable__ auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { - TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); - return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); + return this->view.getRow( rowIdx ); } template< typename Real, @@ -280,13 +262,13 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -bool +void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { - return this->addElement( row, column, value, 0.0 ); + this->view.setElement( row, column, value ); } template< typename Real, @@ -296,63 +278,14 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > -bool +void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) { - TNL_ASSERT( row >= 0 && row < this->rows && - column >= 0 && column < this->columns, - std::cerr << " row = " << row - << " column = " << column - << " this->rows = " << this->rows - << " this->columns = " << this->columns ); - - const IndexType rowSize = this->segments.getSegmentSize( row ); - IndexType col( this->getPaddingIndex() ); - IndexType i; - IndexType globalIdx; - for( i = 0; i < rowSize; i++ ) - { - globalIdx = this->segments.getGlobalIndex( row, i ); - TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); - col = this->columnIndexes.getElement( globalIdx ); - if( col == column ) - { - this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); - return true; - } - if( col == this->getPaddingIndex() || col > column ) - break; - } - if( i == rowSize ) - return false; - if( col == this->getPaddingIndex() ) - { - this->columnIndexes.setElement( globalIdx, column ); - this->values.setElement( globalIdx, value ); - return true; - } - else - { - IndexType j = rowSize - 1; - while( j > i ) - { - const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); - const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); - TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); - TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); - this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); - this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); - j--; - } - - this->columnIndexes.setElement( globalIdx, column ); - this->values.setElement( globalIdx, value ); - return true; - } + this->view.addElement( row, column, value, thisElementMultiplicator ); } template< typename Real, @@ -367,16 +300,7 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll getElement( const IndexType row, const IndexType column ) const { - const IndexType rowSize = this->segments.getSegmentSize( row ); - for( IndexType i = 0; i < rowSize; i++ ) - { - const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); - TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); - const IndexType col = this->columnIndexes.getElement( globalIdx ); - if( col == column ) - return this->values.getElement( globalIdx ); - } - return 0.0; + return this->view.getElement( row, column ); } template< typename Real, @@ -393,7 +317,7 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll rowVectorProduct( const IndexType row, const Vector& vector ) const { - + this->view.rowVectorProduct( row, vector ); } template< typename Real, @@ -412,7 +336,8 @@ vectorProduct( const InVector& inVector, const RealType& matrixMultiplicator, const RealType& inVectorAddition ) const { - TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, inVectorAddition ); + /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); const auto inVectorView = inVector.getConstView(); @@ -433,7 +358,7 @@ vectorProduct( const InVector& inVector, auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; }; - this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );*/ } template< typename Real, @@ -448,7 +373,8 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { - const auto columns_view = this->columnIndexes.getConstView(); + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); + /*const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { @@ -457,7 +383,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return zero; }; - this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/ } template< typename Real, @@ -487,7 +413,8 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { - const auto columns_view = this->columnIndexes.getConstView(); + this->view.forRows( first, last, function ); + /*const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { @@ -495,7 +422,7 @@ forRows( IndexType first, IndexType last, Function& function ) const return true; }; this->segments.forSegments( first, last, f ); - + */ } template< typename Real, @@ -510,14 +437,15 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { - auto columns_view = this->columnIndexes.getView(); + this->view.forRows( first, last, function ); + /*auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] ); return true; }; - this->segments.forSegments( first, last, f ); + this->segments.forSegments( first, last, f );*/ } template< typename Real, @@ -653,7 +581,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value ) mutable { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value, bool& compute ) mutable { if( value != 0.0 ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); @@ -684,7 +612,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixValuesBuffer_view[ bufferIdx ] = value; }; @@ -697,7 +625,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& //// // Copy matrix elements from the buffer to the matrix const IndexType matrix_columns = this->getColumns(); - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] ); while( inValue == 0.0 && column < matrix_columns ) @@ -723,7 +651,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& //std::cerr << "This matrix = " << std::endl << *this << std::endl; } return *this; - + } template< typename Real, @@ -759,7 +687,7 @@ operator=( const RHSMatrix& matrix ) if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { if( columnIndex != paddingIndex ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); @@ -793,7 +721,7 @@ operator=( const RHSMatrix& matrix ) //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { if( columnIndex != paddingIndex ) { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; @@ -810,7 +738,7 @@ operator=( const RHSMatrix& matrix ) //// // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; if( column != paddingIndex ) @@ -819,7 +747,7 @@ operator=( const RHSMatrix& matrix ) value = thisValuesBuffer_view[ bufferIdx ]; } }; - this->forRows( baseRow, lastRow, f2 ); + //this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } //std::cerr << "This matrix = " << std::endl << *this << std::endl; @@ -838,9 +766,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: save( File& file ) const { - Matrix< RealType, DeviceType, IndexType >::save( file ); - file << this->columnIndexes; - this->segments.save( file ); + this->view.save( file ); } template< typename Real, @@ -898,20 +824,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - const IndexType rowLength = this->segments.getSegmentSize( row ); - for( IndexType i = 0; i < rowLength; i++ ) - { - const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); - const IndexType column = this->columnIndexes.getElement( globalIdx ); - if( column == this->getPaddingIndex() ) - break; - str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t"; - } - str << std::endl; - } + this->view.print( str ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index aba3b4642..7168e1e8e 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -143,6 +143,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; + SparseMatrixView& operator=( const SparseMatrixView& matrix ); + void save( File& file ) const; void save( const String& fileName ) const; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 3b192b4e9..055a1d60e 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -118,7 +118,7 @@ getCompressedRowLengths( Vector& rowLengths ) const rowLengths.setSize( this->getRows() ); rowLengths = 0; auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { @@ -159,20 +159,6 @@ getNumberOfNonzeroMatrixElements() const return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); } -template< typename Real, - typename Device, - typename Index, - typename MatrixType, - template< typename, typename > class SegmentsView > -void -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -reset() -{ - Matrix< Real, Device, Index >::reset(); - this->columnIndexes.reset(); - -} - template< typename Real, typename Device, typename Index, @@ -225,12 +211,10 @@ addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - TNL_ASSERT( row >= 0 && row < this->rows && - column >= 0 && column < this->columns, - std::cerr << " row = " << row - << " column = " << column - << " this->rows = " << this->rows - << " this->columns = " << this->columns ); + TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." ); + TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); const IndexType rowSize = this->segments.getSegmentSize( row ); IndexType col( this->getPaddingIndex() ); @@ -291,6 +275,11 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: getElement( const IndexType row, const IndexType column ) const { + TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." ); + TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); + const IndexType rowSize = this->segments.getSegmentSize( row ); for( IndexType i = 0; i < rowSize; i++ ) { @@ -332,11 +321,34 @@ vectorProduct( const InVector& inVector, const RealType& matrixMultiplicator, const RealType& inVectorAddition ) const { + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + const IndexType column = columnIndexesView[ globalIdx ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + return valuesView[ globalIdx ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + + /*const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ offset ]; compute = ( column != paddingIndex ); @@ -351,6 +363,7 @@ vectorProduct( const InVector& inVector, outVectorView[ row ] = value; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + */ } template< typename Real, @@ -366,10 +379,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType globalIdx ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) - return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); @@ -401,8 +414,8 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto columns_view = this->columnIndexes.getConstView(); const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, localIdx, globalIdx ); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool { + function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute ); return true; }; this->segments.forSegments( first, last, f ); @@ -421,9 +434,8 @@ forRows( IndexType first, IndexType last, Function& function ) auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); const IndexType paddingIndex_ = this->getPaddingIndex(); - auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { - function( rowIdx, localIdx, globalIdx ); - return true; + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { + function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute ); }; this->segments.forSegments( first, last, f ); } @@ -501,6 +513,20 @@ performSORIteration( const Vector1& b, return false; } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix ) +{ + MatrixView< Real, Device, Index >::operator=( matrix ); + this->columnIndexes.copy( matrix.columnIndexes ); + this->segments = matrix.segments; +} + template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt index 4320b6c1f..46c6be2cd 100644 --- a/src/UnitTests/Matrices/Legacy/CMakeLists.txt +++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt @@ -1,6 +1,6 @@ IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + #CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -24,9 +24,9 @@ IF( BUILD_CUDA ) TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) - TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + #ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + #TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -59,7 +59,7 @@ ELSE( BUILD_CUDA ) ENDIF( BUILD_CUDA ) -ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h index 7069fd777..aece39d9a 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h @@ -18,14 +18,14 @@ #include #include -/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; +using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; -using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/ +using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >; -template< typename Device, typename Index, typename IndexAllocator > +/*template< typename Device, typename Index, typename IndexAllocator > using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; template< typename Device, typename Index, typename IndexAllocator > @@ -36,10 +36,10 @@ using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL: using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; -using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;*/ -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include /* @@ -98,7 +98,7 @@ void setupUnevenRowSizeMatrix( Matrix& m ) m.setElement( 7, 0, value++ ); // 7th row - for( int i = 0; i < cols - 1; i++ ) // 8th row + for( int i = 0; i < cols - 1; i++ ) // 8th row m.setElement( 8, i, value++ ); m.setElement( 9, 5, value++ ); // 9th row @@ -158,21 +158,21 @@ void checkUnevenRowSizeMatrix( Matrix& m ) EXPECT_EQ( m.getElement( 6, 3 ), 0 ); EXPECT_EQ( m.getElement( 6, 4 ), 0 ); EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); EXPECT_EQ( m.getElement( 7, 1 ), 0 ); EXPECT_EQ( m.getElement( 7, 2 ), 0 ); EXPECT_EQ( m.getElement( 7, 3 ), 0 ); EXPECT_EQ( m.getElement( 7, 4 ), 0 ); EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); EXPECT_EQ( m.getElement( 8, 1 ), 24 ); EXPECT_EQ( m.getElement( 8, 2 ), 25 ); EXPECT_EQ( m.getElement( 8, 3 ), 26 ); EXPECT_EQ( m.getElement( 8, 4 ), 27 ); EXPECT_EQ( m.getElement( 8, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); EXPECT_EQ( m.getElement( 9, 1 ), 0 ); EXPECT_EQ( m.getElement( 9, 2 ), 0 ); @@ -205,7 +205,7 @@ void setupAntiTriDiagMatrix( Matrix& m ) rowLengths.setElement( 0, 4); rowLengths.setElement( 1, 4 ); m.setCompressedRowLengths( rowLengths ); - + int value = 1; for( int i = 0; i < rows; i++ ) for( int j = cols - 1; j > 2; j-- ) @@ -396,39 +396,39 @@ void testCopyAssignment() template< typename Matrix1, typename Matrix2 > void testConversion() { - + { SCOPED_TRACE("Tri Diagonal Matrix"); - + Matrix1 triDiag1; setupTriDiagMatrix( triDiag1 ); checkTriDiagMatrix( triDiag1 ); - + Matrix2 triDiag2; //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); triDiag2 = triDiag1; checkTriDiagMatrix( triDiag2 ); } - + { SCOPED_TRACE("Anti Tri Diagonal Matrix"); - + Matrix1 antiTriDiag1; setupAntiTriDiagMatrix( antiTriDiag1 ); checkAntiTriDiagMatrix( antiTriDiag1 ); - + Matrix2 antiTriDiag2; //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); antiTriDiag2 = antiTriDiag1; checkAntiTriDiagMatrix( antiTriDiag2 ); } - + { SCOPED_TRACE("Uneven Row Size Matrix"); Matrix1 unevenRowSize1; setupUnevenRowSizeMatrix( unevenRowSize1 ); checkUnevenRowSizeMatrix( unevenRowSize1 ); - + Matrix2 unevenRowSize2; //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); unevenRowSize2 = unevenRowSize1; -- GitLab From 5927f064b6b9f06fa6c1e434d6bd1488c35966a4 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 17 Jan 2020 15:51:31 +0100 Subject: [PATCH 100/179] Fixed sparse matrix assignment operator. --- src/TNL/Matrices/Dense.hpp | 1 + src/TNL/Matrices/SparseMatrix.hpp | 12 ++++---- src/TNL/Matrices/SparseMatrixView.hpp | 4 +-- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 28 +++++++++---------- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 5504f6408..21ae1bce6 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -990,6 +990,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ > } } } + this->view = this->getView(); return *this; } diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index c94506084..14495ad3d 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -542,6 +542,7 @@ operator=( const SparseMatrix& matrix ) Matrix< Real, Device, Index >::operator=( matrix ); this->columnIndexes = matrix.columnIndexes; this->segments = matrix.segments; + this->view = this->getView(); return *this; } @@ -581,7 +582,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType columnIdx, RHSIndexType globalIndex, const RHSRealType& value, bool& compute ) mutable { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { if( value != 0.0 ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); @@ -650,6 +651,7 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& } //std::cerr << "This matrix = " << std::endl << *this << std::endl; } + this->view = this->getView(); return *this; } @@ -684,7 +686,7 @@ operator=( const RHSMatrix& matrix ) auto values_view = this->values.getView(); columns_view = paddingIndex; - if( std::is_same< DeviceType, RHSDeviceType >::value ) + /*if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { @@ -697,7 +699,7 @@ operator=( const RHSMatrix& matrix ) }; matrix.forAllRows( f ); } - else + else*/ { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); @@ -747,11 +749,11 @@ operator=( const RHSMatrix& matrix ) value = thisValuesBuffer_view[ bufferIdx ]; } }; - //this->forRows( baseRow, lastRow, f2 ); + this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; } - //std::cerr << "This matrix = " << std::endl << *this << std::endl; } + this->view = this->getView(); return *this; } diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 055a1d60e..965a51b8b 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -415,7 +415,7 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool { - function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute ); + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); return true; }; this->segments.forSegments( first, last, f ); @@ -435,7 +435,7 @@ forRows( IndexType first, IndexType last, Function& function ) auto values_view = this->values.getView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { - function( rowIdx, localIdx, columns_view[ globalIdx ], globalIdx, compute ); + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); }; this->segments.forSegments( first, last, f ); } diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 8677443b2..e9898bb39 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -40,7 +40,7 @@ using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL: using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include /* @@ -99,7 +99,7 @@ void setupUnevenRowSizeMatrix( Matrix& m ) m.setElement( 7, 0, value++ ); // 7th row - for( int i = 0; i < cols - 1; i++ ) // 8th row + for( int i = 0; i < cols - 1; i++ ) // 8th row m.setElement( 8, i, value++ ); m.setElement( 9, 5, value++ ); // 9th row @@ -159,21 +159,21 @@ void checkUnevenRowSizeMatrix( Matrix& m ) EXPECT_EQ( m.getElement( 6, 3 ), 0 ); EXPECT_EQ( m.getElement( 6, 4 ), 0 ); EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); EXPECT_EQ( m.getElement( 7, 1 ), 0 ); EXPECT_EQ( m.getElement( 7, 2 ), 0 ); EXPECT_EQ( m.getElement( 7, 3 ), 0 ); EXPECT_EQ( m.getElement( 7, 4 ), 0 ); EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); EXPECT_EQ( m.getElement( 8, 1 ), 24 ); EXPECT_EQ( m.getElement( 8, 2 ), 25 ); EXPECT_EQ( m.getElement( 8, 3 ), 26 ); EXPECT_EQ( m.getElement( 8, 4 ), 27 ); EXPECT_EQ( m.getElement( 8, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); EXPECT_EQ( m.getElement( 9, 1 ), 0 ); EXPECT_EQ( m.getElement( 9, 2 ), 0 ); @@ -206,7 +206,7 @@ void setupAntiTriDiagMatrix( Matrix& m ) rowLengths.setElement( 0, 4); rowLengths.setElement( 1, 4 ); m.setCompressedRowLengths( rowLengths ); - + int value = 1; for( int i = 0; i < rows; i++ ) for( int j = cols - 1; j > 2; j-- ) @@ -397,39 +397,37 @@ void testCopyAssignment() template< typename Matrix1, typename Matrix2 > void testConversion() { - { SCOPED_TRACE("Tri Diagonal Matrix"); - + Matrix1 triDiag1; setupTriDiagMatrix( triDiag1 ); checkTriDiagMatrix( triDiag1 ); - + Matrix2 triDiag2; - //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); triDiag2 = triDiag1; checkTriDiagMatrix( triDiag2 ); } - + { SCOPED_TRACE("Anti Tri Diagonal Matrix"); - + Matrix1 antiTriDiag1; setupAntiTriDiagMatrix( antiTriDiag1 ); checkAntiTriDiagMatrix( antiTriDiag1 ); - + Matrix2 antiTriDiag2; //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); antiTriDiag2 = antiTriDiag1; checkAntiTriDiagMatrix( antiTriDiag2 ); } - + { SCOPED_TRACE("Uneven Row Size Matrix"); Matrix1 unevenRowSize1; setupUnevenRowSizeMatrix( unevenRowSize1 ); checkUnevenRowSizeMatrix( unevenRowSize1 ); - + Matrix2 unevenRowSize2; //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); unevenRowSize2 = unevenRowSize1; -- GitLab From c2514bc6b8b76a13d73f947ae5d7a67d4d7204c5 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 17 Jan 2020 17:40:18 +0100 Subject: [PATCH 101/179] Debuging assignment of tridiagonal matrix to sparse matrix. --- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 59 +++++++--- src/TNL/Matrices/TridiagonalMatrixRowView.hpp | 2 +- src/TNL/Matrices/TridiagonalMatrixView.h | 2 + src/TNL/Matrices/TridiagonalMatrixView.hpp | 17 +-- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 105 +++++++++++++++++- 6 files changed, 155 insertions(+), 32 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 43ea25bf5..26d5d2d84 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - ViewType getView(); + ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 14495ad3d..6d3d9d8b8 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -82,13 +82,13 @@ template< typename Real, typename IndexAllocator > auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -getView() -> ViewType +getView() const -> ViewType { return ViewType( this->getRows(), this->getColumns(), - this->getValues().getView(), - this->columnIndexes.getView(), - this->segments.getView() ); + const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast + const_cast< SparseMatrix* >( this )->columnIndexes.getView(), + const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, @@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& thisValuesBuffer_view = matrixValuesBuffer_view; //// - // Copy matrix elements from the buffer to the matrix + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements. const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); @@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix ) using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; - using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; + using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >; - typename RHSMatrix::RowsCapacitiesType rowLengths; + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; + // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; - /*if( std::is_same< DeviceType, RHSDeviceType >::value ) + if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); - auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); if( columnIndex != paddingIndex ) { - IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; values_view[ thisGlobalIdx ] = value; + rowLocalIndexes_view[ rowIdx ] = localIdx; } }; matrix.forAllRows( f ); } - else*/ + else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); @@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix ) thisColumnsBuffer_view = matrixColumnsBuffer_view; //// - // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; - const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; - if( column != paddingIndex ) + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements + const IndexType matrix_columns = this->getColumns(); + auto matrix_view = matrix.getView(); + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); + auto matrixRow = matrix_view.getRow( rowIdx ); + while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns ) { - columnIndex = column; - value = thisValuesBuffer_view[ bufferIdx ]; + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + rowLocalIndexes_view[ rowIdx ] = localIdx; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; + value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp index ba60876b9..80fc1a26d 100644 --- a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp @@ -29,7 +29,7 @@ auto TridiagonalMatrixRowView< ValuesView, Indexer >:: getSize() const -> IndexType { - return indexer.getRowSize(); + return indexer.getRowSize( rowIdx ); } template< typename ValuesView, typename Indexer > diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 7db517cbd..61b005c5a 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index d8fa6061c..7fc5fd6b7 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { - function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); - function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); } else if( rowIdx + 1 < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); - function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); } else if( rowIdx < indexer.getColumns() ) { - function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); - function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); } else - function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index e9898bb39..b285f5e05 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -370,6 +372,7 @@ void testCopyAssignment() Matrix2 triDiag2; triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); checkTriDiagMatrix( triDiag2 ); } { @@ -390,6 +393,7 @@ void testCopyAssignment() Matrix2 unevenRowSize2; unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); } } @@ -435,6 +439,62 @@ void testConversion() } } +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ ) + hostMatrix.setElement( i, j, i + j ); + + std::cerr << hostMatrix << std::endl; + Matrix matrix; + matrix = hostMatrix; + std::cerr << matrix << std::endl; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + /*for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ ) + cudaMatrix.setElement( i, j, i + j );*/ + + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif + +} + template< typename Matrix > void denseMatrixAssignment() { @@ -469,10 +529,10 @@ void denseMatrixAssignment() #ifdef HAVE_CUDA DenseCuda cudaMatrix( rows, columns ); - //cudaMatrix = hostMatrix; - for( IndexType i = 0; i < columns; i++ ) + cudaMatrix = hostMatrix; + /*for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j ); + cudaMatrix.setElement( i, j, i + j );*/ matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); @@ -487,7 +547,7 @@ void denseMatrixAssignment() } #endif } - +/* TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); @@ -619,6 +679,43 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) testConversion< SE_cuda, E_cuda >(); } #endif +*/ + +//// +// Tridiagonal matrix assignment test +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host ) +{ + tridiagonalMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host ) +{ + tridiagonalMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + tridiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda ) +{ + tridiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + tridiagonalMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + tridiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + + // Dense matrix assignment test TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) -- GitLab From 733401dcfd44ac508f43d1f5731bfc795226ddf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 17 Jan 2020 22:51:01 +0100 Subject: [PATCH 102/179] Fixed tridiagonal to sparse matrix assignment. Debugging multidiagonal to sparse matrix assignment. --- .../Matrices/MultidiagonalMatrixRowView.hpp | 2 +- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 3 +- src/TNL/Matrices/SparseMatrix.hpp | 3 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 117 ++++++++++++++---- 4 files changed, 98 insertions(+), 27 deletions(-) diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp index 88aae3f15..855b8463a 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp @@ -30,7 +30,7 @@ auto MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: getSize() const -> IndexType { - return indexer.getRowSize(); + return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx ); } template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 2839c997a..96312d03c 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -393,12 +393,13 @@ forRows( IndexType first, IndexType last, Function& function ) const const IndexType diagonalsCount = this->diagonalsShifts.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) { const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) - function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ); + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 6d3d9d8b8..66813a732 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -682,7 +682,6 @@ operator=( const RHSMatrix& matrix ) Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); rowLocalIndexes = 0; - // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); @@ -697,7 +696,7 @@ operator=( const RHSMatrix& matrix ) auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); - if( columnIndex != paddingIndex ) + if( value != 0.0 && columnIndex != paddingIndex ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index b285f5e05..7f29191b6 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -421,7 +421,6 @@ void testConversion() checkAntiTriDiagMatrix( antiTriDiag1 ); Matrix2 antiTriDiag2; - //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); antiTriDiag2 = antiTriDiag1; checkAntiTriDiagMatrix( antiTriDiag2 ); } @@ -433,7 +432,6 @@ void testConversion() checkUnevenRowSizeMatrix( unevenRowSize1 ); Matrix2 unevenRowSize2; - //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); unevenRowSize2 = unevenRowSize1; checkUnevenRowSizeMatrix( unevenRowSize2 ); } @@ -451,21 +449,19 @@ void tridiagonalMatrixAssignment() const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( rows, i + 2 ); j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) hostMatrix.setElement( i, j, i + j ); - std::cerr << hostMatrix << std::endl; Matrix matrix; matrix = hostMatrix; - std::cerr << matrix << std::endl; using RowCapacitiesType = typename Matrix::RowsCapacitiesType; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); - RowCapacitiesType exactRowLengths{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; EXPECT_EQ( rowCapacities, exactRowLengths ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j < rows; j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) { if( abs( i - j ) > 1 ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); @@ -476,15 +472,11 @@ void tridiagonalMatrixAssignment() #ifdef HAVE_CUDA TridiagonalCuda cudaMatrix( rows, columns ); cudaMatrix = hostMatrix; - /*for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( row, i + 1 ); j++ ) - cudaMatrix.setElement( i, j, i + j );*/ - matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); EXPECT_EQ( rowCapacities, exactRowLengths ); - for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j < rows; j++ ) + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) { if( abs( i - j ) > 1 ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); @@ -492,7 +484,58 @@ void tridiagonalMatrixAssignment() EXPECT_EQ( matrix.getElement( i, j ), i + j ); } #endif +} +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( i - j ) ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( i - j ) ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif } template< typename Matrix > @@ -530,10 +573,6 @@ void denseMatrixAssignment() #ifdef HAVE_CUDA DenseCuda cudaMatrix( rows, columns ); cudaMatrix = hostMatrix; - /*for( IndexType i = 0; i < columns; i++ ) - for( IndexType j = 0; j <= i; j++ ) - cudaMatrix.setElement( i, j, i + j );*/ - matrix = cudaMatrix; matrix.getCompressedRowLengths( rowCapacities ); EXPECT_EQ( rowCapacities, exactRowLengths ); @@ -547,7 +586,7 @@ void denseMatrixAssignment() } #endif } -/* + TEST( SparseMatrixCopyTest, CSR_HostToHost ) { testCopyAssignment< CSR_host, CSR_host >(); @@ -616,8 +655,8 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) } #endif - -// test conversion between formats +//// +// Test of conversion between formats TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host ) { testConversion< CSR_host, E_host >(); @@ -679,7 +718,6 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) testConversion< SE_cuda, E_cuda >(); } #endif -*/ //// // Tridiagonal matrix assignment test @@ -715,8 +753,41 @@ TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) } #endif // HAVE_CUDA +//// +// Multidiagonal matrix assignment test +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host ) +{ + multidiagonalMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host ) +{ + multidiagonalMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + multidiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda ) +{ + multidiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + multidiagonalMatrixAssignment< E_cuda >(); +} +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + multidiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA +//// // Dense matrix assignment test TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) { -- GitLab From 27cb1cfa5e00fe44807e0626845ff147c6400a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 18 Jan 2020 10:45:02 +0100 Subject: [PATCH 103/179] Fixed multidiagonal to sparse matrix assignment. --- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 7f29191b6..6e1427ad1 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -502,7 +502,7 @@ void multidiagonalMatrixAssignment() MultidiagonalHost hostMatrix( rows, columns, diagonals ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) - if( diagonals.containsValue( i - j ) ) + if( diagonals.containsValue( j - i ) ) hostMatrix.setElement( i, j, i + j ); Matrix matrix; @@ -510,15 +510,15 @@ void multidiagonalMatrixAssignment() using RowCapacitiesType = typename Matrix::RowsCapacitiesType; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); - RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; EXPECT_EQ( rowCapacities, exactRowLengths ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) { - if( diagonals.containsValue( i - j ) ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else + if( diagonals.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } #ifdef HAVE_CUDA @@ -530,10 +530,10 @@ void multidiagonalMatrixAssignment() for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) { - if( diagonals.containsValue( i - j ) > 1 ) - EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); - else + if( diagonals.containsValue( j - i ) ) EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } #endif } -- GitLab From ba7b368151559d89f5eccb9e604e3e9d8314e0fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 19 Jan 2020 20:33:19 +0100 Subject: [PATCH 104/179] Added command line argument --with-cxx-flags to build script. --- CMakeLists.txt | 2 +- build | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68252ba6a..7d1666163 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options -set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) +set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WITH_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode diff --git a/build b/build index 67492f02c..9deb12d10 100755 --- a/build +++ b/build @@ -62,6 +62,7 @@ do --with-tools=* ) WITH_TOOLS="${option#*=}" ;; --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; --with-python=* ) WITH_PYTHON="${option#*=}" ;; + --with-cxx-flags=* ) WITH_CXX_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; @@ -91,6 +92,7 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." + echo " --with-cxx-flags=FLAGS Additional flags for C++ compiler." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." @@ -142,6 +144,7 @@ cmake_command=( -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} + -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS} -DDCMTK_DIR=${DCMTK_DIR} ) -- GitLab From 3bfc83cce078241e3b656ebd06dcca10f85db09e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 19 Jan 2020 20:33:43 +0100 Subject: [PATCH 105/179] Fixing MultidiagonalMatrix. --- src/TNL/Matrices/Multidiagonal.hpp | 6 +++--- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 7bc83f2d4..94470d3d1 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -668,7 +668,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo if( std::is_same< Device, Device_ >::value ) { const auto matrix_view = matrix.getView(); - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; this->forAllRows( f ); @@ -695,7 +695,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixValuesBuffer_view[ bufferIdx ] = value; }; @@ -707,7 +707,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo //// // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; value = thisValuesBuffer_view[ bufferIdx ]; }; diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 96312d03c..224368465 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -216,8 +216,10 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: setValue( const RealType& v ) { + // we dont do this->values = v here because it would set even elements 'outside' the matrix + // method getNumberOfNonzeroElements would not well const RealType newValue = v; - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable { value = newValue; }; this->forAllRows( f ); @@ -419,12 +421,13 @@ forRows( IndexType first, IndexType last, Function& function ) const IndexType diagonalsCount = this->diagonalsShifts.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) + for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ ) { const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) - function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ); + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); -- GitLab From 7cc6eab9d627d5688c551dbfe588c6395d0e7553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 19 Jan 2020 21:38:58 +0100 Subject: [PATCH 106/179] Added DenseMatrixCopyTest. --- src/TNL/Matrices/Dense.hpp | 3 +- src/UnitTests/Matrices/CMakeLists.txt | 24 +- .../Matrices/DenseMatrixCopyTest.cpp | 11 + src/UnitTests/Matrices/DenseMatrixCopyTest.cu | 11 + src/UnitTests/Matrices/DenseMatrixCopyTest.h | 633 ++++++++++++++++++ 5 files changed, 672 insertions(+), 10 deletions(-) create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.cpp create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.cu create mode 100644 src/UnitTests/Matrices/DenseMatrixCopyTest.h diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 21ae1bce6..3d9ff102e 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -130,7 +130,8 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { - this->setDimensions( rowLengths.getSize(), max( rowLengths ) ); + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "" ); + TNL_ASSERT_LE( max( rowLengths ), this->getColumns(), "" ); } template< typename Real, diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 287495405..b19c8b705 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -1,15 +1,18 @@ ADD_SUBDIRECTORY( Legacy ) IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -26,18 +29,22 @@ IF( BUILD_CUDA ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( DenseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -60,9 +67,8 @@ ELSE( BUILD_CUDA ) ENDIF( BUILD_CUDA ) - -ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp new file mode 100644 index 000000000..5cdd413af --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixCopyTest.cpp - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cu b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu new file mode 100644 index 000000000..5fbd77efa --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixCopyTest.cu - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h new file mode 100644 index 000000000..ef7809a6b --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -0,0 +1,633 @@ +/*************************************************************************** + DenseMatrixCopyTest.h - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using Dense_host = TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >; +using Dense_host_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >; +using Dense_cuda = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >; +using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >; + + +#ifdef HAVE_GTEST +#include + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 12 13 | + * | 14 15 16 17 18 | + * | 19 20 | + * | 21 | + * | 22 | + * | 23 24 25 26 27 | + * \ 28 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.setDimensions( rows, cols ); + + int value = 1; + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + m.setElement( 6, 0, value++ ); // 6th row + + m.setElement( 7, 0, value++ ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + m.setElement( 9, 5, value++ ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 3 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 5 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 6 ); + EXPECT_EQ( m.getElement( 2, 1 ), 7 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 9 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 11 ); + EXPECT_EQ( m.getElement( 3, 4 ), 12 ); + EXPECT_EQ( m.getElement( 3, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 20 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 21 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); + EXPECT_EQ( m.getElement( 8, 1 ), 24 ); + EXPECT_EQ( m.getElement( 8, 2 ), 25 ); + EXPECT_EQ( m.getElement( 8, 3 ), 26 ); + EXPECT_EQ( m.getElement( 8, 4 ), 27 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 28 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 2 1 \ + * | 5 4 3 | + * | 8 7 6 | + * | 11 10 9 | + * | 14 13 12 | + * | 16 15 | + * \ 17 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.setDimensions( rows, cols ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, value++ ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 2 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 3 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 6 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 13 ); + EXPECT_EQ( m.getElement( 4, 2 ), 12 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 16 ); + EXPECT_EQ( m.getElement( 5, 1 ), 15 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 17 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 | + * | 12 13 14 | + * | 15 16 | + * \ 17 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.setDimensions( rows, cols ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, value++ ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 3 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 6 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 9 ); + EXPECT_EQ( m.getElement( 3, 3 ), 10 ); + EXPECT_EQ( m.getElement( 3, 4 ), 11 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 12 ); + EXPECT_EQ( m.getElement( 4, 4 ), 13 ); + EXPECT_EQ( m.getElement( 4, 5 ), 14 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 15 ); + EXPECT_EQ( m.getElement( 5, 5 ), 16 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 17 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( j - i ) ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + +TEST( DenseMatrixCopyTest, Dense_HostToDense_Host ) +{ + testCopyAssignment< Dense_host, Dense_host >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_host >(); + testCopyAssignment< Dense_host, Dense_host_RowMajorOrder >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, Dense_HostToDense_Cuda ) +{ + testCopyAssignment< Dense_host, Dense_cuda >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda >(); + testCopyAssignment< Dense_host, Dense_cuda_RowMajorOrder >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, Dense_CudaToDense_Host ) +{ + testCopyAssignment< Dense_cuda, Dense_host >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host >(); + testCopyAssignment< Dense_cuda, Dense_host_RowMajorOrder >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, Dense_CudaToDense_Cuda ) +{ + testCopyAssignment< Dense_cuda, Dense_cuda >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda >(); + testCopyAssignment< Dense_cuda, Dense_cuda_RowMajorOrder >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + + +TEST( DenseMatrixCopyTest, CSR_HostToDense_Host ) +{ + testCopyAssignment< CSR_host, Dense_host >(); + testCopyAssignment< CSR_host, Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, CSR_HostToDense_cuda ) +{ + testCopyAssignment< CSR_host, Dense_cuda >(); + testCopyAssignment< CSR_host, Dense_cuda_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, CSR_CudaToDense_host ) +{ + testCopyAssignment< CSR_cuda, Dense_host >(); + testCopyAssignment< CSR_cuda, Dense_host_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, CSR_CudaToDense_cuda ) +{ + testCopyAssignment< CSR_cuda, Dense_cuda >(); + testCopyAssignment< CSR_cuda, Dense_cuda_RowMajorOrder >(); +} +#endif + +//// +// Tridiagonal matrix assignment test +TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_host ) +{ + tridiagonalMatrixAssignment< Dense_host >(); + tridiagonalMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_cuda ) +{ + tridiagonalMatrixAssignment< Dense_cuda >(); + tridiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +//// +// Multidiagonal matrix assignment test +TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_host ) +{ + multidiagonalMatrixAssignment< Dense_host >(); + multidiagonalMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_cuda ) +{ + multidiagonalMatrixAssignment< Dense_cuda >(); + multidiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +//// +// Dense matrix assignment test +TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_host ) +{ + denseMatrixAssignment< Dense_host >(); + denseMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_cuda ) +{ + denseMatrixAssignment< Dense_cuda >(); + denseMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST + +#include "../main.h" -- GitLab From bb234945233bf2f929440f62e3d9643c61ad8d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 21 Jan 2020 22:17:46 +0100 Subject: [PATCH 107/179] Implementing dense matrix assignment. --- src/TNL/Matrices/Dense.h | 27 ++- src/TNL/Matrices/Dense.hpp | 220 ++++++++++++------- src/TNL/Matrices/Multidiagonal.h | 3 + src/TNL/Matrices/Multidiagonal.hpp | 14 ++ src/TNL/Matrices/MultidiagonalMatrixView.h | 3 + src/TNL/Matrices/MultidiagonalMatrixView.hpp | 16 +- src/TNL/Matrices/Tridiagonal.h | 3 + src/TNL/Matrices/Tridiagonal.hpp | 16 +- src/TNL/Matrices/TridiagonalMatrixView.h | 3 + src/TNL/Matrices/TridiagonalMatrixView.hpp | 16 +- src/UnitTests/Matrices/DenseMatrixCopyTest.h | 52 +++-- 11 files changed, 276 insertions(+), 97 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 2e71316e9..8c109ac1e 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index > Vector2& x, const RealType& omega = 1.0 ) const; - // copy assignment + /** + * \brief Assignment operator for exactly the same type of the dense matrix. + * + * @param matrix + * @return + */ Dense& operator=( const Dense& matrix ); - // cross-device copy assignment - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > - Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); + /** + * \brief Assignment operator for other dense matrices. + * + * @param matrix + * @return + */ + template< typename RHSReal, typename RHSDevice, typename RHSIndex, + bool RHSRowMajorOrder, typename RHSRealAllocator > + Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ); + + /** + * \brief Assignment operator for other (sparse) types of matrices. + * @param matrix + * @return + */ + template< typename RHSMatrix > + Dense& operator=( const RHSMatrix& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 3d9ff102e..e1acfee67 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -118,7 +118,7 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: setLike( const Matrix_& matrix ) { - Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); } template< typename Real, @@ -896,39 +896,81 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat { setLike( matrix ); this->values = matrix.values; - /*const IndexType bufferRowsCount( 128 ); - const IndexType columns = this->getColumns(); - const size_t bufferSize = bufferRowsCount * columns; - Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize ); - Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); - auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); - auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); - - IndexType baseRow( 0 ); - const IndexType rowsCount = this->getRows(); - while( baseRow < rowsCount ) + return *this; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename RHSReal, typename RHSDevice, typename RHSIndex, + bool RHSRowMajorOrder, typename RHSRealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ) +{ + using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + + this->setLike( matrix ); + if( RowMajorOrder == RHSRowMajorOrder ) { - const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + this->values = matrix.values; + return *this; + } - //// - // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; - sourceValuesBuffer_view[ bufferIdx ] = value; - }; - matrix.forRows( baseRow, lastRow, f1 ); - destinationValuesBuffer = sourceValuesBuffer; - - //// - // Copy buffer to this matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; - value = destinationValuesBuffer_view[ bufferIdx ]; + auto this_view = this->view; + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + this_view( rowIdx, columnIdx ) = value; }; - this->forRows( baseRow, lastRow, f2 ); - baseRow += bufferRowsCount; + matrix.forAllRows( f ); } - return *this;*/ + else + { + const IndexType maxRowLength = matrix.getColumns(); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix. + auto this_view = this->view; + auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable { + IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx; + this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; + }; + Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); + baseRow += bufferRowsCount; + } + } + return *this; } template< typename Real, @@ -936,59 +978,85 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename RHSMatrix > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) +operator=( const RHSMatrix& matrix ) { - this->setLike( matrix ); - if( RowMajorOrder == RowMajorOrder_ ) - this->values = matrix.getValues(); + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + auto values_view = this->values.getView(); + RHSIndexType padding_index = matrix.getPaddingIndex(); + this->values = 0.0; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + if( value != 0.0 && columnIdx != padding_index ) + values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value; + }; + matrix.forAllRows( f ); + } else { - if( std::is_same< DeviceType, Device_ >::value ) + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) { - auto this_view = this->getView(); - auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { - this_view.getRow( rowIdx ).setElement( columnIdx, value ); + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = padding_index; + matrixColumnsBuffer_view = padding_index; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + if( columnIndex != padding_index ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; + } }; - matrix.forAllRows( f ); - } - else - { - const IndexType bufferRowsCount( 128 ); - const IndexType columns = this->getColumns(); - const size_t bufferSize = bufferRowsCount * columns; - Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize ); - Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize ); - auto sourceValuesBuffer_view = sourceValuesBuffer.getView(); - auto destinationValuesBuffer_view = destinationValuesBuffer.getView(); - - IndexType baseRow( 0 ); - const IndexType rowsCount = this->getRows(); - while( baseRow < rowsCount ) - { - const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); - - //// - // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; - sourceValuesBuffer_view[ bufferIdx ] = value; - }; - matrix.forRows( baseRow, lastRow, f1 ); - - destinationValuesBuffer = sourceValuesBuffer; - - //// - // Copy buffer to this matrix - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable { - const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx; - value = destinationValuesBuffer_view[ bufferIdx ]; - }; - this->forRows( baseRow, lastRow, f2 ); - baseRow += bufferRowsCount; - } + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto this_view = this->view; + auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable { + IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx; + IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ]; + if( columnIdx != padding_index ) + this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; + }; + Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); + baseRow += bufferRowsCount; } } this->view = this->getView(); diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 9e5f92295..927e52449 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexerType& getIndexer(); + __cuda_callable__ + IndexType getPaddingIndex() const; + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 94470d3d1..5d83004f2 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const return this->indexer.getGlobalIndex( row, localIdx ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getPaddingIndex() const +{ + return this->view.getPaddingIndex(); +} + /* template<> class MultidiagonalDeviceDependentCode< Devices::Host > diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index 1e5a9bd28..f623a3ca6 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ IndexerType& getIndexer(); + __cuda_callable__ + IndexType getPaddingIndex() const; + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 224368465..f35c6d713 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -713,11 +713,25 @@ template< typename Real, typename Index, bool RowMajorOrder > __cuda_callable__ -Index MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getElementIndex( const IndexType row, const IndexType localIdx ) const { return this->indexer.getGlobalIndex( row, localIdx ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getPaddingIndex() const +{ + return -1; +} + + } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 82549e744..3f8902310 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -174,6 +174,9 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexerType& getIndexer(); + __cuda_callable__ + IndexType getPaddingIndex() const; + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 8f4f4e190..d99715a47 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -678,7 +678,8 @@ template< typename Real, bool RowMajorOrder, typename RealAllocator > __cuda_callable__ -Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElementIndex( const IndexType row, const IndexType column ) const { IndexType localIdx = column - row; @@ -691,6 +692,19 @@ getElementIndex( const IndexType row, const IndexType column ) const return this->indexer.getGlobalIndex( row, localIdx ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getPaddingIndex() const +{ + return this->view.getPaddingIndex(); +} + /* template<> class TridiagonalDeviceDependentCode< Devices::Host > diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 61b005c5a..82b76c73f 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -151,6 +151,9 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ IndexerType& getIndexer(); + __cuda_callable__ + IndexType getPaddingIndex() const; + protected: __cuda_callable__ diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 7fc5fd6b7..6e293ffd0 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -675,7 +675,8 @@ template< typename Real, typename Index, bool RowMajorOrder > __cuda_callable__ -Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: getElementIndex( const IndexType row, const IndexType column ) const { IndexType localIdx = column - row; @@ -688,5 +689,18 @@ getElementIndex( const IndexType row, const IndexType column ) const return this->indexer.getGlobalIndex( row, localIdx ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getPaddingIndex() const +{ + return -1; +} + + } // namespace Matrices } // namespace TNL diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h index ef7809a6b..3ef31f107 100644 --- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -59,9 +59,20 @@ using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda, template< typename Matrix > void setupUnevenRowSizeMatrix( Matrix& m ) { - const int rows = 10; - const int cols = 6; - m.setDimensions( rows, cols ); + const int rows = 10; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); int value = 1; for( int i = 0; i < cols - 4; i++ ) // 0th row @@ -183,15 +194,21 @@ void checkUnevenRowSizeMatrix( Matrix& m ) template< typename Matrix > void setupAntiTriDiagMatrix( Matrix& m ) { - const int rows = 7; - const int cols = 6; - m.setDimensions( rows, cols ); + const int rows = 7; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); - int value = 1; - for( int i = 0; i < rows; i++ ) - for( int j = cols - 1; j > 2; j-- ) - if( j - i + 1 < cols && j - i + 1 >= 0 ) - m.setElement( i, j - i + 1, value++ ); + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, value++ ); } template< typename Matrix > @@ -267,6 +284,13 @@ void setupTriDiagMatrix( Matrix& m ) const int rows = 7; const int cols = 6; m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + int value = 1; for( int i = 0; i < rows; i++ ) @@ -387,7 +411,7 @@ void tridiagonalMatrixAssignment() Matrix matrix; matrix = hostMatrix; - using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; @@ -439,7 +463,7 @@ void multidiagonalMatrixAssignment() Matrix matrix; matrix = hostMatrix; - using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; @@ -488,7 +512,7 @@ void denseMatrixAssignment() Matrix matrix; matrix = hostMatrix; - using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; -- GitLab From 78c47bd249e66866baf369915fb4de94319bedf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 21 Jan 2020 22:18:45 +0100 Subject: [PATCH 108/179] Debugging sparse matrix assignment in CUDA. --- src/TNL/Containers/Segments/CSRView.hpp | 1 + src/TNL/Containers/Segments/SegmentView.h | 10 +++++++++- src/TNL/Matrices/SparseMatrix.hpp | 16 +++++++++------- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 1 - 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index cc4d16fe6..d8ea9b06e 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -156,6 +156,7 @@ auto CSRView< Device, Index >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { + printf( "----> size %d \n", offsets[ segmentIdx + 1 ] ); return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h index 29f2e7781..edfe31df4 100644 --- a/src/TNL/Containers/Segments/SegmentView.h +++ b/src/TNL/Containers/Segments/SegmentView.h @@ -29,8 +29,16 @@ class SegmentView< Index, false > SegmentView( const IndexType offset, const IndexType size, const IndexType step ) - : segmentOffset( offset ), segmentSize( size ), step( step ){}; + : segmentOffset( offset ), segmentSize( size ), step( step ) + { + printf( "--- size = %d \n", size ); + }; + __cuda_callable__ + SegmentView( const SegmentView& view ) + : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ) + { + }; __cuda_callable__ IndexType getSize() const { diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 66813a732..447d8d250 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -673,7 +673,6 @@ operator=( const RHSMatrix& matrix ) using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; - using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >; Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); @@ -712,9 +711,9 @@ operator=( const RHSMatrix& matrix ) const IndexType bufferRowsCount( 128 ); const size_t bufferSize = bufferRowsCount * maxRowLength; Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); - Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); - Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); @@ -754,12 +753,15 @@ operator=( const RHSMatrix& matrix ) RealType inValue( 0.0 ); IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); auto matrixRow = matrix_view.getRow( rowIdx ); - while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns ) + IndexType s = matrixRow.getSize(); + //printf( " row %d size %d \n", rowIdx, s ); + while( inValue == 0.0 && localIdx < 0 ) { bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; - inValue = thisValuesBuffer_view[ bufferIdx ]; + TNL_ASSERT_LT( bufferIdx, bufferSize, "" ); + //inValue = thisValuesBuffer_view[ bufferIdx ]; } - rowLocalIndexes_view[ rowIdx ] = localIdx; + /*rowLocalIndexes_view[ rowIdx ] = localIdx; if( inValue == 0.0 ) { columnIndex = paddingIndex; @@ -769,7 +771,7 @@ operator=( const RHSMatrix& matrix ) { columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; value = inValue; - } + }*/ }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 6e1427ad1..46777f6c0 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -64,7 +64,6 @@ void setupUnevenRowSizeMatrix( Matrix& m ) { const int rows = 10; const int cols = 6; - m.reset(); m.setDimensions( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); -- GitLab From 5be2891b58b375441e8785b281b4a603e25852bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 22 Jan 2020 21:02:00 +0100 Subject: [PATCH 109/179] Fixing segments. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/CSRView.hpp | 4 ++-- src/TNL/Containers/Segments/SegmentView.h | 10 +++------- src/UnitTests/Containers/Segments/SegmentsTest.hpp | 12 ++++++------ 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 3581748fa..9a05d84f7 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -207,7 +207,7 @@ void CSR< Device, Index, IndexAllocator >:: forAll( Function& f, Args... args ) const { - this->forSegments( 0, this->getSize(), f, args... ); + this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index d8ea9b06e..d6ec55b6a 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -156,7 +156,7 @@ auto CSRView< Device, Index >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - printf( "----> size %d \n", offsets[ segmentIdx + 1 ] ); + printf( "----> segmentIdx %d offset %d size %d ptr %p \n", segmentIdx, offsets[ segmentIdx ], offsets.getSize(), offsets.getData() ); return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } @@ -186,7 +186,7 @@ void CSRView< Device, Index >:: forAll( Function& f, Args... args ) const { - this->forSegments( 0, this->getSize(), f, args... ); + this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h index edfe31df4..eeb3f9d24 100644 --- a/src/TNL/Containers/Segments/SegmentView.h +++ b/src/TNL/Containers/Segments/SegmentView.h @@ -29,16 +29,12 @@ class SegmentView< Index, false > SegmentView( const IndexType offset, const IndexType size, const IndexType step ) - : segmentOffset( offset ), segmentSize( size ), step( step ) - { - printf( "--- size = %d \n", size ); - }; + : segmentOffset( offset ), segmentSize( size ), step( step ){}; __cuda_callable__ SegmentView( const SegmentView& view ) - : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ) - { - }; + : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){}; + __cuda_callable__ IndexType getSize() const { diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 8320fafe5..590b39881 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -127,17 +127,17 @@ void test_AllReduction_MaximumInSegments() TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() ); - IndexType k( 1 ); + /*IndexType k( 1 ); for( IndexType i = 0; i < segmentsCount; i++ ) for( IndexType j = 0; j < segmentSize; j++ ) - v.setElement( segments.getGlobalIndex( i, j ), k++ ); - /*auto view = v.getView(); - auto init = [=] __cuda_callable__ ( const IndexType i, const IndexType j ) mutable -> bool { - view[ j ] = j + 1; + v.setElement( segments.getGlobalIndex( i, j ), k++ );*/ + auto view = v.getView(); + auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool { + view[ globalIdx ] = segmentIdx * 5 + localIdx + 1; return true; }; segments.forAll( init ); - std::cerr << v << std::endl;*/ + std::cerr << v << std::endl; TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); -- GitLab From 64c2d435eeacb92fb39066e0273d7d66082f26cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 23 Jan 2020 21:32:56 +0100 Subject: [PATCH 110/179] Fixed sparse matrix assignment. --- src/TNL/Containers/Segments/CSR.hpp | 1 + src/TNL/Containers/Segments/CSRView.h | 2 ++ src/TNL/Containers/Segments/CSRView.hpp | 6 ++-- src/TNL/Containers/Segments/Ellpack.hpp | 3 +- src/TNL/Containers/Segments/EllpackView.h | 6 ++-- src/TNL/Containers/Segments/EllpackView.hpp | 9 +++-- src/TNL/Containers/Segments/SlicedEllpack.hpp | 1 + .../Containers/Segments/SlicedEllpackView.h | 2 ++ .../Containers/Segments/SlicedEllpackView.hpp | 3 ++ src/TNL/Matrices/Dense.hpp | 1 - src/TNL/Matrices/DenseMatrixView.hpp | 1 + src/TNL/Matrices/MatrixView.hpp | 1 + src/TNL/Matrices/Multidiagonal.h | 2 ++ src/TNL/Matrices/Multidiagonal.hpp | 3 +- src/TNL/Matrices/MultidiagonalMatrixView.h | 2 ++ src/TNL/Matrices/MultidiagonalMatrixView.hpp | 1 + src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 34 ++++++++++--------- src/TNL/Matrices/SparseMatrixView.hpp | 3 +- src/TNL/Matrices/Tridiagonal.h | 2 ++ src/TNL/Matrices/Tridiagonal.hpp | 1 + .../Containers/Segments/SegmentsTest.hpp | 5 --- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 5 +++ 23 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 9a05d84f7..55dcba74c 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -253,6 +253,7 @@ CSR< Device, Index, IndexAllocator >:: operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) { this->offsets = source.offsets; + return *this; } template< typename Device, diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 2ad849f97..759fe8ff7 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -52,8 +52,10 @@ class CSRView static String getSerializationType(); + __cuda_callable__ ViewType getView(); + __cuda_callable__ ConstViewType getConstView() const; /** diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index d6ec55b6a..043e06e04 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -66,6 +66,7 @@ getSerializationType() template< typename Device, typename Index > +__cuda_callable__ typename CSRView< Device, Index >::ViewType CSRView< Device, Index >:: getView() @@ -75,6 +76,7 @@ getView() template< typename Device, typename Index > +__cuda_callable__ typename CSRView< Device, Index >::ConstViewType CSRView< Device, Index >:: getConstView() const @@ -156,7 +158,6 @@ auto CSRView< Device, Index >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - printf( "----> segmentIdx %d offset %d size %d ptr %p \n", segmentIdx, offsets[ segmentIdx ], offsets.getSize(), offsets.getData() ); return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } @@ -167,7 +168,7 @@ void CSRView< Device, Index >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { - const auto offsetsView = this->offsets.getConstView(); + const auto offsetsView = this->offsets; auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType begin = offsetsView[ segmentIdx ]; const IndexType end = offsetsView[ segmentIdx + 1 ]; @@ -228,6 +229,7 @@ CSRView< Device, Index >:: operator=( const CSRView& view ) { this->offsets.copy( view.offsets ); + return *this; } template< typename Device, diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 8763c2e5d..663a65bc8 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -293,7 +293,7 @@ void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { - this->forSegments( 0, this->getSize(), f, args... ); + this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, @@ -364,6 +364,7 @@ operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alig this->segmentSize = source.segmentSize; this->size = source.size; this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); + return *this; } template< typename Device, diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index f64b04068..3870f0802 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -37,7 +37,7 @@ class EllpackView template< typename Device_, typename Index_ > using ViewTemplate = EllpackView< Device_, Index_ >; using ViewType = EllpackView; - //using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; + using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; __cuda_callable__ @@ -54,9 +54,11 @@ class EllpackView static String getSerializationType(); + __cuda_callable__ ViewType getView(); - //ConstViewType getConstView() const; + __cuda_callable__ + ConstViewType getConstView() const; /** * \brief Number segments. diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index c0d0b3721..ea2dc0d21 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -78,6 +78,7 @@ template< typename Device, typename Index, bool RowMajorOrder, int Alignment > +__cuda_callable__ typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType EllpackView< Device, Index, RowMajorOrder, Alignment >:: getView() @@ -85,16 +86,17 @@ getView() return ViewType( segmentSize, size, alignedSize ); } -/*template< typename Device, +template< typename Device, typename Index, bool RowMajorOrder, int Alignment > +__cuda_callable__ typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType EllpackView< Device, Index, RowMajorOrder, Alignment >:: getConstView() const { return ConstViewType( segmentSize, size, alignedSize ); -}*/ +} template< typename Device, typename Index, @@ -233,7 +235,7 @@ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { - this->forSegments( 0, this->getSize(), f, args... ); + this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, @@ -302,6 +304,7 @@ operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view ) this->segmentSize = view.segmentSize; this->size = view.size; this->alignedSize = view.alignedSize; + return *this; } template< typename Device, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 62e2ca7d5..3d3a6d8c3 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -408,6 +408,7 @@ operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ this->segmentsCount = source.segmentsCount; this->sliceOffsets = source.sliceOffsets; this->sliceSegmentSizes = source.sliceSegmentSizes; + return *this; } template< typename Device, diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index c8c73c3f2..2b310a805 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -56,8 +56,10 @@ class SlicedEllpackView static String getSerializationType(); + __cuda_callable__ ViewType getView(); + __cuda_callable__ ConstViewType getConstView() const; __cuda_callable__ diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 98a3d9b81..3e3c8c09c 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -89,6 +89,7 @@ template< typename Device, typename Index, bool RowMajorOrder, int SliceSize > +__cuda_callable__ typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: getView() @@ -100,6 +101,7 @@ template< typename Device, typename Index, bool RowMajorOrder, int SliceSize > +__cuda_callable__ typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: getConstView() const @@ -357,6 +359,7 @@ operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& v this->segmentsCount = view.segmentsCount; this->sliceOffsets.copy( view.sliceOffsets ); this->sliceSegmentSizes.copy( view.sliceSegmentSizes ); + return *this; } template< typename Device, diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index e1acfee67..91a98e7f9 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -925,7 +925,6 @@ operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealA auto this_view = this->view; if( std::is_same< DeviceType, RHSDeviceType >::value ) { - const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { this_view( rowIdx, columnIdx ) = value; }; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 890606436..a11ff263c 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -901,6 +901,7 @@ operator=( const DenseMatrixView& matrix ) { MatrixView< Real, Device, Index >::operator=( matrix ); this->segments = matrix.segments; + return *this; } template< typename Real, diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 275a22870..360478d05 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -131,6 +131,7 @@ operator=( const MatrixView& view ) rows = view.rows; columns = view.columns; values.copy( view.values ); + return *this; } template< typename Real, diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 927e52449..749ddfae7 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -113,8 +113,10 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 5d83004f2..659d6d4eb 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -683,9 +683,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); - auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); - auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); @@ -716,6 +714,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo } } } + return *this; } template< typename Real, diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index f623a3ca6..97ff94f85 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -87,8 +87,10 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index f35c6d713..33010cebc 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -338,6 +338,7 @@ operator=( const MultidiagonalMatrixView& view ) this->diagonalsShifts.copy( view.diagonalsShifts ); this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts ); this->indexer = view.indexer; + return *this; } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 26d5d2d84..9f91ee7d1 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -93,7 +93,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void getCompressedRowLengths( Vector& rowLengths ) const; [[deprecated]] - virtual IndexType getRowLength( const IndexType row ) const {}; + virtual IndexType getRowLength( const IndexType row ) const { return 0;}; template< typename Matrix > void setLike( const Matrix& matrix ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 447d8d250..cf4472922 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -210,7 +210,7 @@ Index SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getNumberOfNonzeroMatrixElements() const { - this->view.getNumberOfNonzeroMatrixElements(); + return this->view.getNumberOfNonzeroMatrixElements(); } template< typename Real, @@ -602,7 +602,6 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); - auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); @@ -689,11 +688,10 @@ operator=( const RHSMatrix& matrix ) auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; - if( std::is_same< DeviceType, RHSDeviceType >::value ) + /*if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { - RealType inValue( 0.0 ); IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); if( value != 0.0 && columnIndex != paddingIndex ) { @@ -705,7 +703,7 @@ operator=( const RHSMatrix& matrix ) }; matrix.forAllRows( f ); } - else + else*/ { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); @@ -714,10 +712,13 @@ operator=( const RHSMatrix& matrix ) Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisRowLengths; + thisRowLengths = rowLengths; auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); auto thisValuesBuffer_view = thisValuesBuffer.getView(); auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + matrixValuesBuffer_view = 0.0; IndexType baseRow( 0 ); const IndexType rowsCount = this->getRows(); @@ -735,6 +736,7 @@ operator=( const RHSMatrix& matrix ) const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; matrixValuesBuffer_view[ bufferIdx ] = value; + //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl; } }; matrix.forRows( baseRow, lastRow, f1 ); @@ -748,20 +750,20 @@ operator=( const RHSMatrix& matrix ) // Copy matrix elements from the buffer to the matrix and ignoring // zero matrix elements const IndexType matrix_columns = this->getColumns(); - auto matrix_view = matrix.getView(); - auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable { + const auto thisRowLengths_view = thisRowLengths.getConstView(); + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); - IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); - auto matrixRow = matrix_view.getRow( rowIdx ); - IndexType s = matrixRow.getSize(); - //printf( " row %d size %d \n", rowIdx, s ); - while( inValue == 0.0 && localIdx < 0 ) + size_t bufferIdx; + IndexType bufferLocalIdx( rowLocalIndexes_view[ rowIdx ] ); + while( inValue == 0.0 && localIdx < thisRowLengths_view[ rowIdx ] ) { - bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + bufferLocalIdx++; TNL_ASSERT_LT( bufferIdx, bufferSize, "" ); - //inValue = thisValuesBuffer_view[ bufferIdx ]; + inValue = thisValuesBuffer_view[ bufferIdx ]; } - /*rowLocalIndexes_view[ rowIdx ] = localIdx; + //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx + // << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl; + rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx; if( inValue == 0.0 ) { columnIndex = paddingIndex; @@ -771,7 +773,7 @@ operator=( const RHSMatrix& matrix ) { columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; value = inValue; - }*/ + } }; this->forRows( baseRow, lastRow, f2 ); baseRow += bufferRowsCount; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 965a51b8b..4ac0a29b8 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -139,7 +139,7 @@ Index SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: getRowLength( const IndexType row ) const { - + return 0; } template< typename Real, @@ -525,6 +525,7 @@ operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView MatrixView< Real, Device, Index >::operator=( matrix ); this->columnIndexes.copy( matrix.columnIndexes ); this->segments = matrix.segments; + return *this; } template< typename Real, diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 3f8902310..6f0c6a548 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -91,8 +91,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + __cuda_callable__ RowView getRow( const IndexType& rowIdx ); + __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index d99715a47..2ccdc4838 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -592,6 +592,7 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca this->forAllRows( f ); } } + return *this; } template< typename Real, diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp index 590b39881..6d4692dbe 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest.hpp +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -127,17 +127,12 @@ void test_AllReduction_MaximumInSegments() TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() ); - /*IndexType k( 1 ); - for( IndexType i = 0; i < segmentsCount; i++ ) - for( IndexType j = 0; j < segmentSize; j++ ) - v.setElement( segments.getGlobalIndex( i, j ), k++ );*/ auto view = v.getView(); auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool { view[ globalIdx ] = segmentIdx * 5 + localIdx + 1; return true; }; segments.forAll( init ); - std::cerr << v << std::endl; TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 46777f6c0..053f1e9fb 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -458,6 +458,7 @@ void tridiagonalMatrixAssignment() RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) @@ -510,6 +511,10 @@ void multidiagonalMatrixAssignment() RowCapacitiesType rowCapacities; matrix.getCompressedRowLengths( rowCapacities ); RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + /*std::cerr << "hostMatrix " << hostMatrix << std::endl; + std::cerr << "matrix " << matrix << std::endl; + std::cerr << "rowCapacities " << rowCapacities << std::endl;*/ + EXPECT_EQ( rowCapacities, exactRowLengths ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) -- GitLab From 9ff08661cf33881fd1b7ceda79ddeae9ff43a4ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 23 Jan 2020 22:21:56 +0100 Subject: [PATCH 111/179] Fixing matrix reader - it can throw exceptions. --- src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 9c58d25b0..ea39d80b7 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -365,8 +365,7 @@ struct LinearSolversBenchmark // load the matrix if( ends_with( file_matrix, ".mtx" ) ) { Matrices::MatrixReader< MatrixType > reader; - if( ! reader.readMtxFile( file_matrix, *matrixPointer ) ) - return false; + reader.readMtxFile( file_matrix, *matrixPointer ); } else { matrixPointer->load( file_matrix ); -- GitLab From 488faa318e66ee3667966ca2e72a9a2a1738bea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 23 Jan 2020 23:05:26 +0100 Subject: [PATCH 112/179] Deleting unused variables. --- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 1 - src/TNL/Matrices/TridiagonalMatrixView.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 33010cebc..2bd5392df 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -485,7 +485,6 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); - const auto valuesView = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { return value * inVectorView[ column ]; }; diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 6e293ffd0..99e3e87d4 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -449,7 +449,6 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); - const auto valuesView = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { return value * inVectorView[ column ]; }; -- GitLab From 20459748647069bf6fb65140eaa3ec4f86fe5f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 23 Jan 2020 23:05:54 +0100 Subject: [PATCH 113/179] Uncommenting sparse matrix assignment code for the same devices. --- src/TNL/Matrices/SparseMatrix.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index cf4472922..d3641e030 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -688,7 +688,7 @@ operator=( const RHSMatrix& matrix ) auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; - /*if( std::is_same< DeviceType, RHSDeviceType >::value ) + if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { @@ -703,7 +703,7 @@ operator=( const RHSMatrix& matrix ) }; matrix.forAllRows( f ); } - else*/ + else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); -- GitLab From 178431c56823ef1dc517db51b4da6f402e0346e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 26 Jan 2020 18:06:13 +0100 Subject: [PATCH 114/179] Fix of Array documentation. --- src/TNL/Containers/ArrayView.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index b4e063b7e..05e3e9791 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -238,7 +238,7 @@ public: ArrayView& operator=( const T& array ); /** - * \brief Shallow copy of the array view + * \brief Makes shallow copy of the array view. * * \param view Reference to the source array view. * \return Reference to this array view. -- GitLab From e5020801922e854f2901cbdbfb6235b74fcf1e53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 26 Jan 2020 18:06:22 +0100 Subject: [PATCH 115/179] Added check against cross device memory acces to Array and ArrayView. --- src/TNL/Algorithms/CudaScanKernel.h | 2 +- src/TNL/Algorithms/Scan.hpp | 10 +++++----- src/TNL/Containers/Array.h | 10 ++++++++-- src/TNL/Containers/Array.hpp | 12 ++++++++++++ src/TNL/Containers/ArrayView.h | 10 ++++++++-- src/TNL/Containers/ArrayView.hpp | 12 ++++++++++++ src/UnitTests/Containers/DistributedArrayTest.h | 2 +- 7 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/TNL/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h index 79a201959..97912b234 100644 --- a/src/TNL/Algorithms/CudaScanKernel.h +++ b/src/TNL/Algorithms/CudaScanKernel.h @@ -277,7 +277,7 @@ struct CudaScanKernelLauncher elementsInBlock, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ], - &blockSums[ gridIdx * maxGridSize() ] ); + &blockSums.getData()[ gridIdx * maxGridSize() ] ); } // synchronize the null-stream after all grids diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp index 7b6d31ece..fc1f2f1e5 100644 --- a/src/TNL/Algorithms/Scan.hpp +++ b/src/TNL/Algorithms/Scan.hpp @@ -225,8 +225,8 @@ perform( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::perform( end - begin, - &v[ begin ], // input - &v[ begin ], // output + &v.getData()[ begin ], // input + &v.getData()[ begin ], // output reduction, zero ); #else @@ -251,8 +251,8 @@ performFirstPhase( Vector& v, return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase( end - begin, - &v[ begin ], // input - &v[ begin ], // output + &v.getData()[ begin ], // input + &v.getData()[ begin ], // output reduction, zero ); #else @@ -279,7 +279,7 @@ performSecondPhase( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase( end - begin, - &v[ begin ], // output + &v.getData()[ begin ], // output blockShifts.getData(), reduction, shift ); diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index 117cb32ae..a9705e66f 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -446,7 +446,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. @@ -460,7 +463,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 24e3f8b43..2a60986f5 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -509,6 +509,12 @@ Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; @@ -523,6 +529,12 @@ const Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { +#ifdef __CUDA_ARCH__ + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index 05e3e9791..af54aef8a 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -347,7 +347,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. @@ -362,7 +365,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 4ef8ac3f6..81e143ac2 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -252,6 +252,12 @@ __cuda_callable__ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return data[ i ]; @@ -265,6 +271,12 @@ const Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { +#ifdef __CUDA_ARCH__ + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return data[ i ]; diff --git a/src/UnitTests/Containers/DistributedArrayTest.h b/src/UnitTests/Containers/DistributedArrayTest.h index f4bd35830..204bc6fe7 100644 --- a/src/UnitTests/Containers/DistributedArrayTest.h +++ b/src/UnitTests/Containers/DistributedArrayTest.h @@ -94,7 +94,7 @@ TYPED_TEST( DistributedArrayTest, copyFromGlobal ) ArrayViewType localArrayView = this->distributedArray.getLocalView(); auto globalView = globalArray.getConstView(); const auto localRange = this->distributedArray.getLocalRange(); - globalView.bind( &globalArray[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() ); + globalView.bind( &globalArray.getData()[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() ); EXPECT_EQ( localArrayView, globalView ); } -- GitLab From 335a61d58a18fbf8133bdaedc167c3a042e2dd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 27 Jan 2020 18:41:29 +0100 Subject: [PATCH 116/179] Fixed distributed matrix. --- src/TNL/Matrices/DistributedSpMV.h | 2 +- src/TNL/Matrices/ThreePartVector.h | 22 ++++++++++++++++++++++ src/UnitTests/Matrices/SparseMatrixTest.h | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index 8460ded4d..a4711dce0 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -176,7 +176,7 @@ public: continue; if( commPatternStarts( rank, j ) < commPatternEnds( rank, j ) ) commRequests.push_back( CommunicatorType::IRecv( - &globalBuffer[ commPatternStarts( rank, j ) ], + globalBuffer.getPointer( commPatternStarts( rank, j ) ), commPatternEnds( rank, j ) - commPatternStarts( rank, j ), j, 0, group ) ); } diff --git a/src/TNL/Matrices/ThreePartVector.h b/src/TNL/Matrices/ThreePartVector.h index f57e3e116..f28f544f5 100644 --- a/src/TNL/Matrices/ThreePartVector.h +++ b/src/TNL/Matrices/ThreePartVector.h @@ -75,6 +75,17 @@ public: return right[ i - left.getSize() - middle.getSize() ]; } + __cuda_callable__ + const Real* getPointer( Index i ) const + { + if( i < left.getSize() ) + return &left.getData()[ i ]; + else if( i < left.getSize() + middle.getSize() ) + return &middle.getData()[ i - left.getSize() ]; + else + return &right.getData()[ i - left.getSize() - middle.getSize() ]; + } + friend std::ostream& operator<<( std::ostream& str, const ThreePartVectorView& v ) { str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]"; @@ -143,6 +154,17 @@ public: return right[ i - left.getSize() - middle.getSize() ]; } + __cuda_callable__ + const Real* getPointer( Index i ) const + { + if( i < left.getSize() ) + return &left.getData()[ i ]; + else if( i < left.getSize() + middle.getSize() ) + return &middle.getData()[ i - left.getSize() ]; + else + return &right.getData()[ i - left.getSize() - middle.getSize() ]; + } + friend std::ostream& operator<<( std::ostream& str, const ThreePartVector& v ) { str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]"; diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 8b1d57566..b08d66c33 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -24,7 +24,7 @@ using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host ) { - test_PerformSORIteration< CSR_host_float >(); + //test_PerformSORIteration< CSR_host_float >(); } #ifdef HAVE_CUDA -- GitLab From ae650de9553307195fecd2eb1ec35d160b99110b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 27 Jan 2020 20:56:01 +0100 Subject: [PATCH 117/179] Added sparse matrix values holder. --- src/TNL/Matrices/details/ValuesHolder.h | 78 +++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/TNL/Matrices/details/ValuesHolder.h diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h new file mode 100644 index 000000000..e54ec8026 --- /dev/null +++ b/src/TNL/Matrices/details/ValuesHolder.h @@ -0,0 +1,78 @@ +/*************************************************************************** + ValuesHolder.h - description + ------------------- + begin : Jan 27, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +class ValuesHolder +: public Containers::Vector< Real, Device, Index, RealAllocator > +{}; + +template< typename Device, + typename Index, + typename RealAllocator > +class ValuesHolder< bool, Device, Index, RealAllocator > +{ + public: + + using RealType = bool; + using DeviceType = Device; + using IndexType = Index; + + ValuesHolder() + : size( 0 ){}; + + ValuesdHolder( const IndexType& size ) + : size( size ){}; + + void setSize( const IndexType& size ) { this->size = size; }; + + __cuda_callable__ + IndexType getSize() const { return this->size; }; + + __cuda_callable__ + bool operator[]( const IndexType& i ) const { return true; }; + + + protected: + + IndexType size; + +}; + +/** + * \brief Serialization of arrays into binary files. + */ +template< typename Device, typename Index, typename Allocator > +File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; + +template< typename Device, typename Index, typename Allocator > +File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; + +/** + * \brief Deserialization of arrays from binary files. + */ +template< typename Device, typename Index, typename Allocator > +File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; + +template< typename Device, typename Index, typename Allocator > +File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; + + + } //namespace details + } //namepsace Matrices +} //namespace TNL \ No newline at end of file -- GitLab From 888869bdf2467730c55c4a343738f322f44b9806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 28 Jan 2020 21:19:13 +0100 Subject: [PATCH 118/179] Adding matrix values holder. --- src/TNL/Matrices/Legacy/Sparse.h | 2 +- src/TNL/Matrices/Matrix.h | 12 +-- src/TNL/Matrices/Matrix.hpp | 108 ++++++++++++++---------- src/TNL/Matrices/details/ValuesHolder.h | 62 +++++++++++--- 4 files changed, 120 insertions(+), 64 deletions(-) diff --git a/src/TNL/Matrices/Legacy/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h index 12c76a6a0..8970b182e 100644 --- a/src/TNL/Matrices/Legacy/Sparse.h +++ b/src/TNL/Matrices/Legacy/Sparse.h @@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index > typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; - typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; + typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesHolderType ValuesVector; typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; typedef SparseRow< RealType, IndexType > MatrixRow; diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index a5f2b6b8f..b30a28718 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace TNL { /** @@ -26,7 +27,8 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename ValuesHolder = typename details::ValuesHolder< Real, Device, Index, RealAllocator > > class Matrix : public Object { public: @@ -36,7 +38,7 @@ public: using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; - using ValuesVector = Containers::Vector< RealType, DeviceType, IndexType, RealAllocator >; + using ValuesHolderType = ValuesHolder; using RealAllocatorType = RealAllocator; using ViewType = MatrixView< Real, Device, Index >; using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >; @@ -90,9 +92,9 @@ public: virtual Real getElement( const IndexType row, const IndexType column ) const = 0; - const ValuesVector& getValues() const; + const ValuesHolderType& getValues() const; - ValuesVector& getValues(); + ValuesHolderType& getValues(); // TODO: parallelize and optimize for sparse matrices template< typename Matrix > @@ -131,7 +133,7 @@ public: IndexType rows, columns, numberOfColors; - ValuesVector values; + ValuesHolderType values; }; template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 2d5906d23..29dedcf2b 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -22,20 +22,22 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - typename RealAllocator > -Matrix< Real, Device, Index, RealAllocator >:: + typename RealAllocator, + typename ValuesHolder > +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: Matrix( const RealAllocatorType& allocator ) : rows( 0 ), columns( 0 ), - values( allocator ) + values( allocator ) { } template< typename Real, typename Device, typename Index, - typename RealAllocator > -Matrix< Real, Device, Index, RealAllocator >:: + typename RealAllocator, + typename ValuesHolder > +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator ) : rows( rows_ ), columns( columns_ ), @@ -46,8 +48,9 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows, + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions( const IndexType rows, const IndexType columns ) { TNL_ASSERT( rows > 0 && columns > 0, @@ -60,7 +63,7 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp typename Device, typename Index, typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const { rowLengths.setSize( this->getRows() ); getCompressedRowLengths( rowLengths.getView() ); @@ -69,8 +72,9 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); for( IndexType row = 0; row < this->getRows(); row++ ) @@ -80,9 +84,10 @@ void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( Comp template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > template< typename Matrix_ > -void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix ) +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const Matrix_& matrix ) { setDimensions( matrix.getRows(), matrix.getColumns() ); } @@ -90,8 +95,9 @@ void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matri template< typename Real, typename Device, typename Index, - typename RealAllocator > -Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const + typename RealAllocator, + typename ValuesHolder > +Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedElementsCount() const { return this->values.getSize(); } @@ -99,8 +105,9 @@ Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() template< typename Real, typename Device, typename Index, - typename RealAllocator > -Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const + typename RealAllocator, + typename ValuesHolder > +Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { @@ -112,9 +119,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElem template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > __cuda_callable__ -Index Matrix< Real, Device, Index, RealAllocator >::getRows() const +Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() const { return this->rows; } @@ -122,9 +130,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getRows() const template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > __cuda_callable__ -Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const +Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() const { return this->columns; } @@ -132,9 +141,10 @@ Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const template< typename Real, typename Device, typename Index, - typename RealAllocator > -const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& -Matrix< Real, Device, Index, RealAllocator >:: + typename RealAllocator, + typename ValuesHolder > +const typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType& +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: getValues() const { return this->values; @@ -143,9 +153,10 @@ getValues() const template< typename Real, typename Device, typename Index, - typename RealAllocator > -typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector& -Matrix< Real, Device, Index, RealAllocator >:: + typename RealAllocator, + typename ValuesHolder > +typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType& +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: getValues() { return this->values; @@ -154,8 +165,9 @@ getValues() template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::reset() + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset() { this->rows = 0; this->columns = 0; @@ -165,9 +177,10 @@ void Matrix< Real, Device, Index, RealAllocator >::reset() template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > template< typename MatrixT > -bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( const MatrixT& matrix ) const { if( this->getRows() != matrix.getRows() || this->getColumns() != matrix.getColumns() ) @@ -182,9 +195,10 @@ bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > template< typename MatrixT > -bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( const MatrixT& matrix ) const { return ! operator == ( matrix ); } @@ -192,8 +206,9 @@ bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& file ) const { Object::save( file ); file.save( &this->rows ); @@ -204,8 +219,9 @@ void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::load( File& file ) + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& file ) { Object::load( file ); file.load( &this->rows ); @@ -216,18 +232,20 @@ void Matrix< Real, Device, Index, RealAllocator >::load( File& file ) template< typename Real, typename Device, typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const + typename RealAllocator, + typename ValuesHolder > +void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::print( std::ostream& str ) const { } template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > __cuda_callable__ const Index& -Matrix< Real, Device, Index, RealAllocator >:: +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: getNumberOfColors() const { return this->numberOfColors; @@ -236,9 +254,10 @@ getNumberOfColors() const template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > void -Matrix< Real, Device, Index, RealAllocator >:: +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: computeColorsVector(Containers::Vector &colorsVector) { for( IndexType i = this->getRows() - 1; i >= 0; i-- ) @@ -274,9 +293,10 @@ computeColorsVector(Containers::Vector &colorsVector) template< typename Real, typename Device, typename Index, - typename RealAllocator > + typename RealAllocator, + typename ValuesHolder > void -Matrix< Real, Device, Index, RealAllocator >:: +Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix ) { this->numberOfColors = matrix.getNumberOfColors(); diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h index e54ec8026..9b4ab7540 100644 --- a/src/TNL/Matrices/details/ValuesHolder.h +++ b/src/TNL/Matrices/details/ValuesHolder.h @@ -23,9 +23,8 @@ class ValuesHolder {}; template< typename Device, - typename Index, - typename RealAllocator > -class ValuesHolder< bool, Device, Index, RealAllocator > + typename Index > +class BooleanValuesHolder { public: @@ -33,10 +32,10 @@ class ValuesHolder< bool, Device, Index, RealAllocator > using DeviceType = Device; using IndexType = Index; - ValuesHolder() + BooleanValuesHolder() : size( 0 ){}; - ValuesdHolder( const IndexType& size ) + BooleanValuesHolder( const IndexType& size ) : size( size ){}; void setSize( const IndexType& size ) { this->size = size; }; @@ -47,32 +46,67 @@ class ValuesHolder< bool, Device, Index, RealAllocator > __cuda_callable__ bool operator[]( const IndexType& i ) const { return true; }; - protected: IndexType size; - }; /** - * \brief Serialization of arrays into binary files. + * \brief Serialization of values holder into binary files. */ template< typename Device, typename Index, typename Allocator > -File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; +File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) { + file << holder.getSize(); + return file; }; template< typename Device, typename Index, typename Allocator > -File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; +File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) { + file << holder.getSize(); + return file; }; /** - * \brief Deserialization of arrays from binary files. + * \brief Deserialization of values holder from binary files. */ template< typename Device, typename Index, typename Allocator > -File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; +File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) { + Index size; + file >> size; + holder.setSize( size ); + return file; }; template< typename Device, typename Index, typename Allocator > -File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& array ) { return file; }; +File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) { + Index size; + file >> size; + holder.setSize( size ); + return file; }; + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +struct ValuesHolderSetter +{ + using type = ValuesHolder< Real, Device, Index, RealAllocator >; +}; + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +struct SparseMatrixValuesHolderSetter +{ + using type = ValuesHolder< Real, Device, Index, RealAllocator >; +}; +template< typename Device, + typename Index, + typename RealAllocator > +struct SparseMatrixValuesHolderSetter< bool, Device, Index, RealAllocator > +{ + using type = BooleanValuesHolder< Device, Index >; +}; } //namespace details } //namepsace Matrices -} //namespace TNL \ No newline at end of file +} //namespace TNL -- GitLab From 4a70c74f04beae9c9d204e4acbb1d6b34116959d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 29 Jan 2020 17:29:17 +0100 Subject: [PATCH 119/179] Added matrix values holder. --- src/TNL/Matrices/Dense.h | 4 +- src/TNL/Matrices/DenseMatrixView.h | 4 +- src/TNL/Matrices/Multidiagonal.h | 4 +- src/TNL/Matrices/SparseMatrix.h | 4 +- src/TNL/Matrices/Tridiagonal.h | 4 +- src/TNL/Matrices/details/ValuesHolder.h | 50 +++++++++++++++++++++++-- 6 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 8c109ac1e..cee69688b 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -36,8 +36,8 @@ class Dense : public Matrix< Real, Device, Index > using IndexType = Index; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; + using ValuesHolderType = typename BaseType::ValuesHolderType; + using ValuesViewType = typename ValuesHolderType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index d963dd7c3..c5771f2ee 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -39,8 +39,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > using DeviceType = Device; using IndexType = Index; using BaseType = Matrix< Real, Device, Index >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; + using ValuesHolderType = typename BaseType::ValuesHolderType; + using ValuesViewType = typename ValuesHolderType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 749ddfae7..c93dc7d9c 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -35,8 +35,8 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > using RealAllocatorType = RealAllocator; using IndexAllocatorType = IndexAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; + using ValuesHolderType = typename BaseType::ValuesHolderType; + using ValuesViewType = typename ValuesHolderType::ViewType; using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 9f91ee7d1..ffcccfade 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -46,8 +46,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; - using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVector; - using ValuesViewType = typename ValuesVectorType::ViewType; + using ValuesHolderType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesHolderType; + using ValuesViewType = typename ValuesHolderType::ViewType; using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 6f0c6a548..b65cfb527 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -34,8 +34,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; - using ValuesType = typename BaseType::ValuesVector; - using ValuesViewType = typename ValuesType::ViewType; + using ValuesHolderType = typename BaseType::ValuesHolderType; + using ValuesViewType = typename ValuesHolderType::ViewType; using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h index 9b4ab7540..c76f0d344 100644 --- a/src/TNL/Matrices/details/ValuesHolder.h +++ b/src/TNL/Matrices/details/ValuesHolder.h @@ -14,13 +14,54 @@ namespace TNL { namespace Matrices { namespace details { + +template< typename Real, + typename Device, + typename Index > +struct ValuesHolderView +: public Containers::VectorView< Real, Device, Index > +{ + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + + using Containers::VectorView< Real, Device, Index >::VectorView; + using Containers::VectorView< Real, Device, Index >::operator=; + /*__cuda_callable__ + ValuesHolderView() = default; + + __cuda_callable__ + explicit ValuesHolderView( const ValuesHolderView& ) = default; + + __cuda_callable__ + ValuesHolderView( ValuesHolderView&& ) = default;*/ + +}; + template< typename Real, typename Device, typename Index, - typename RealAllocator > -class ValuesHolder -: public Containers::Vector< Real, Device, Index, RealAllocator > -{}; + typename Allocator > +struct ValuesHolder +: public Containers::Vector< Real, Device, Index, Allocator > +{ + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using AllocatorType = Allocator; + using ViewType = ValuesHolderView< Real, Device, Index >; + + using Containers::Vector< Real, Device, Index, Allocator >::Vector; + using Containers::Vector< Real, Device, Index, Allocator >::operator=; + /*ValuesHolder() = default; + + explicit ValuesHolder( const ValuesHolder& ) = default; + + explicit ValuesHolder( const ValuesHolder& vector, const AllocatorType& allocator ); + + ValuesHolder( ValuesHolder&& ) = default;*/ + +}; template< typename Device, typename Index > @@ -31,6 +72,7 @@ class BooleanValuesHolder using RealType = bool; using DeviceType = Device; using IndexType = Index; + using ViewType = BooleanValuesHolder; BooleanValuesHolder() : size( 0 ){}; -- GitLab From d502b0758edaf8b11c6a6a525e558efcd53918bd Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 31 Jan 2020 10:46:15 +0100 Subject: [PATCH 120/179] ValuesHolder for matrices was not good idea :-). --- src/TNL/Matrices/Dense.h | 4 +- src/TNL/Matrices/DenseMatrixView.h | 4 +- src/TNL/Matrices/Legacy/Sparse.h | 2 +- src/TNL/Matrices/Matrix.h | 12 +- src/TNL/Matrices/Matrix.hpp | 106 +- src/TNL/Matrices/MatrixView.h | 4 +- src/TNL/Matrices/MatrixView.hpp | 26 +- src/TNL/Matrices/Multidiagonal.hpp | 5 +- src/TNL/Matrices/SparseMatrix.h | 11 +- src/TNL/Matrices/SparseMatrix.hpp | 17 +- src/TNL/Matrices/SparseMatrixRowView.h | 9 +- src/TNL/Matrices/SparseMatrixRowView.hpp | 40 +- src/TNL/Matrices/SparseMatrixView.h | 8 +- src/TNL/Matrices/Tridiagonal.hpp | 1 - src/TNL/Matrices/details/ValuesHolder.h | 154 -- .../Matrices/BinarySparseMatrixTest.cpp | 11 + .../Matrices/BinarySparseMatrixTest.cu | 11 + .../Matrices/BinarySparseMatrixTest.h | 117 ++ .../Matrices/BinarySparseMatrixTest.hpp | 1573 +++++++++++++++++ src/UnitTests/Matrices/CMakeLists.txt | 8 + 20 files changed, 1845 insertions(+), 278 deletions(-) delete mode 100644 src/TNL/Matrices/details/ValuesHolder.h create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.cpp create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.cu create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.h create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest.hpp diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index cee69688b..ada48ee02 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -36,8 +36,8 @@ class Dense : public Matrix< Real, Device, Index > using IndexType = Index; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using ValuesHolderType = typename BaseType::ValuesHolderType; - using ValuesViewType = typename ValuesHolderType::ViewType; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index c5771f2ee..95a7c4769 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -39,8 +39,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > using DeviceType = Device; using IndexType = Index; using BaseType = Matrix< Real, Device, Index >; - using ValuesHolderType = typename BaseType::ValuesHolderType; - using ValuesViewType = typename ValuesHolderType::ViewType; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; diff --git a/src/TNL/Matrices/Legacy/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h index 8970b182e..4de00cb2e 100644 --- a/src/TNL/Matrices/Legacy/Sparse.h +++ b/src/TNL/Matrices/Legacy/Sparse.h @@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index > typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; - typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesHolderType ValuesVector; + typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVectorType ValuesVector; typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; typedef SparseRow< RealType, IndexType > MatrixRow; diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index b30a28718..cf61f9efa 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -16,7 +16,6 @@ #include #include #include -#include namespace TNL { /** @@ -27,8 +26,7 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, - typename ValuesHolder = typename details::ValuesHolder< Real, Device, Index, RealAllocator > > + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Matrix : public Object { public: @@ -38,7 +36,7 @@ public: using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; - using ValuesHolderType = ValuesHolder; + using ValuesVectorType = Containers::Vector< Real, Device, Index, RealAllocator >; using RealAllocatorType = RealAllocator; using ViewType = MatrixView< Real, Device, Index >; using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >; @@ -92,9 +90,9 @@ public: virtual Real getElement( const IndexType row, const IndexType column ) const = 0; - const ValuesHolderType& getValues() const; + const ValuesVectorType& getValues() const; - ValuesHolderType& getValues(); + ValuesVectorType& getValues(); // TODO: parallelize and optimize for sparse matrices template< typename Matrix > @@ -133,7 +131,7 @@ public: IndexType rows, columns, numberOfColors; - ValuesHolderType values; + ValuesVectorType values; }; template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 29dedcf2b..0236f94f7 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -22,9 +22,8 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: Matrix( const RealAllocatorType& allocator ) : rows( 0 ), columns( 0 ), @@ -35,9 +34,8 @@ Matrix( const RealAllocatorType& allocator ) template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator ) : rows( rows_ ), columns( columns_ ), @@ -48,9 +46,8 @@ Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions( const IndexType rows, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows, const IndexType columns ) { TNL_ASSERT( rows > 0 && columns > 0, @@ -63,7 +60,7 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setDimensions( typename Device, typename Index, typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const +void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const { rowLengths.setSize( this->getRows() ); getCompressedRowLengths( rowLengths.getView() ); @@ -72,9 +69,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRo template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); for( IndexType row = 0; row < this->getRows(); row++ ) @@ -84,10 +80,9 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getCompressedRo template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > template< typename Matrix_ > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const Matrix_& matrix ) +void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix ) { setDimensions( matrix.getRows(), matrix.getColumns() ); } @@ -95,9 +90,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::setLike( const template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedElementsCount() const + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const { return this->values.getSize(); } @@ -105,9 +99,8 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getAllocatedEl template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNonzeroMatrixElements() const + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { @@ -119,10 +112,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getNumberOfNon template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > __cuda_callable__ -Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() const +Index Matrix< Real, Device, Index, RealAllocator >::getRows() const { return this->rows; } @@ -130,10 +122,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getRows() cons template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > __cuda_callable__ -Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() const +Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const { return this->columns; } @@ -141,10 +132,9 @@ Index Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::getColumns() c template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -const typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType& -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: + typename RealAllocator > +const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType& +Matrix< Real, Device, Index, RealAllocator >:: getValues() const { return this->values; @@ -153,10 +143,9 @@ getValues() const template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -typename Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::ValuesHolderType& -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: + typename RealAllocator > +typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType& +Matrix< Real, Device, Index, RealAllocator >:: getValues() { return this->values; @@ -165,9 +154,8 @@ getValues() template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset() + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::reset() { this->rows = 0; this->columns = 0; @@ -177,10 +165,9 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::reset() template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > template< typename MatrixT > -bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const { if( this->getRows() != matrix.getRows() || this->getColumns() != matrix.getColumns() ) @@ -195,10 +182,9 @@ bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator == ( c template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > template< typename MatrixT > -bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( const MatrixT& matrix ) const +bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const { return ! operator == ( matrix ); } @@ -206,9 +192,8 @@ bool Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::operator != ( c template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& file ) const + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const { Object::save( file ); file.save( &this->rows ); @@ -219,9 +204,8 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::save( File& fil template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& file ) + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::load( File& file ) { Object::load( file ); file.load( &this->rows ); @@ -232,20 +216,18 @@ void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::load( File& fil template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > -void Matrix< Real, Device, Index, RealAllocator, ValuesHolder >::print( std::ostream& str ) const + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const { } template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > __cuda_callable__ const Index& -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: +Matrix< Real, Device, Index, RealAllocator >:: getNumberOfColors() const { return this->numberOfColors; @@ -254,10 +236,9 @@ getNumberOfColors() const template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > void -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: +Matrix< Real, Device, Index, RealAllocator >:: computeColorsVector(Containers::Vector &colorsVector) { for( IndexType i = this->getRows() - 1; i >= 0; i-- ) @@ -293,10 +274,9 @@ computeColorsVector(Containers::Vector &colorsVector) template< typename Real, typename Device, typename Index, - typename RealAllocator, - typename ValuesHolder > + typename RealAllocator > void -Matrix< Real, Device, Index, RealAllocator, ValuesHolder >:: +Matrix< Real, Device, Index, RealAllocator >:: copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix ) { this->numberOfColors = matrix.getNumberOfColors(); diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index 2a6429df5..cd1beda9c 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -95,11 +95,11 @@ public: * \brief Shallow copy of the matrix view. * * @param view - * @return + * @return */ __cuda_callable__ MatrixView& operator=( const MatrixView& view ); - + // TODO: parallelize and optimize for sparse matrices template< typename Matrix > bool operator == ( const Matrix& matrix ) const; diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 360478d05..363fec208 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -35,7 +35,7 @@ template< typename Real, typename Index > __cuda_callable__ MatrixView< Real, Device, Index >:: -MatrixView( const IndexType rows_, +MatrixView( const IndexType rows_, const IndexType columns_, const ValuesView& values_ ) : rows( rows_ ), columns( columns_ ), values( values_ ) @@ -45,7 +45,9 @@ MatrixView( const IndexType rows_, template< typename Real, typename Device, typename Index > -void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const +void +MatrixView< Real, Device, Index >:: +getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const { rowLengths.setSize( this->getRows() ); getCompressedRowLengths( rowLengths.getView() ); @@ -54,7 +56,9 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe template< typename Real, typename Device, typename Index > -void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +void +MatrixView< Real, Device, Index >:: +getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const { TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); for( IndexType row = 0; row < this->getRows(); row++ ) @@ -64,7 +68,9 @@ void MatrixView< Real, Device, Index >::getCompressedRowLengths( CompressedRowLe template< typename Real, typename Device, typename Index > -Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const +Index +MatrixView< Real, Device, Index >:: +getAllocatedElementsCount() const { return this->values.getSize(); } @@ -72,7 +78,9 @@ Index MatrixView< Real, Device, Index >::getAllocatedElementsCount() const template< typename Real, typename Device, typename Index > -Index MatrixView< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const +Index +MatrixView< Real, Device, Index >:: +getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { @@ -109,12 +117,12 @@ getValues() const { return this->values; } - + template< typename Real, typename Device, typename Index > __cuda_callable__ -typename MatrixView< Real, Device, Index >::ValuesView& +typename MatrixView< Real, Device, Index >::ValuesView& MatrixView< Real, Device, Index >:: getValues() { @@ -124,7 +132,7 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -MatrixView< Real, Device, Index >& +MatrixView< Real, Device, Index >& MatrixView< Real, Device, Index >:: operator=( const MatrixView& view ) { @@ -202,7 +210,7 @@ getNumberOfColors() const template< typename Real, typename Device, typename Index > -void +void MatrixView< Real, Device, Index >:: computeColorsVector(Containers::Vector &colorsVector) { diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 659d6d4eb..e8eb66751 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -277,7 +277,6 @@ Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); } template< typename Real, @@ -488,7 +487,7 @@ template< typename Real, typename IndexAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType +typename Vector::RealType Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { @@ -503,7 +502,7 @@ template< typename Real, typename IndexAllocator > template< typename InVector, typename OutVector > -void +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index ffcccfade..49e3b45bb 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -31,6 +31,8 @@ template< typename Real, class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > { public: + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + static constexpr bool isBinary() { return std::is_same< Real, bool >::value; }; using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > @@ -43,24 +45,23 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using IndexType = Index; using RealAllocatorType = RealAllocator; using IndexAllocatorType = IndexAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; - using ValuesHolderType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesHolderType; - using ValuesViewType = typename ValuesHolderType::ViewType; + using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; - SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index d3641e030..60f4695f0 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -27,7 +27,7 @@ template< typename Real, SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: SparseMatrix( const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) - : Matrix< Real, Device, Index, RealAllocator >( realAllocator ), columnIndexes( indexAllocator ) + : BaseType( realAllocator ), columnIndexes( indexAllocator ) { } @@ -69,7 +69,7 @@ SparseMatrix( const IndexType rows, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) -: Matrix< Real, Device, Index, RealAllocator >( rows, columns, realAllocator ), columnIndexes( indexAllocator ) +: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { } @@ -162,8 +162,11 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) thisRowsCapacities = rowsCapacities; this->segments.setSegmentsSizes( thisRowsCapacities ); } - this->values.setSize( this->segments.getStorageSize() ); - this->values = ( RealType ) 0; + if( ! isBinary() ) + { + this->values.setSize( this->segments.getStorageSize() ); + this->values = ( RealType ) 0; + } this->columnIndexes.setSize( this->segments.getStorageSize() ); this->columnIndexes = this->getPaddingIndex(); this->view = this->getView(); @@ -196,7 +199,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: setLike( const Matrix_& matrix ) { - Matrix< Real, Device, Index, RealAllocator >::setLike( matrix ); + BaseType::setLike( matrix ); } template< typename Real, @@ -224,7 +227,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: reset() { - Matrix< Real, Device, Index >::reset(); + BaseType::reset(); } template< typename Real, @@ -761,7 +764,7 @@ operator=( const RHSMatrix& matrix ) TNL_ASSERT_LT( bufferIdx, bufferSize, "" ); inValue = thisValuesBuffer_view[ bufferIdx ]; } - //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx + //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx // << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl; rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx; if( inValue == 0.0 ) diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h index 19445f531..8906ab5ae 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.h +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -1,4 +1,4 @@ -/*************************************************************************** + /*************************************************************************** SparseMatrixRowView.h - description ------------------- begin : Dec 28, 2019 @@ -15,7 +15,8 @@ namespace TNL { template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > class SparseMatrixRowView { public: @@ -26,6 +27,8 @@ class SparseMatrixRowView using ValuesViewType = ValuesView; using ColumnsIndexesViewType = ColumnsIndexesView; + static constexpr bool isBinary() { return isBinary_; }; + __cuda_callable__ SparseMatrixRowView( const SegmentViewType& segmentView, const ValuesViewType& values, @@ -39,7 +42,7 @@ class SparseMatrixRowView __cuda_callable__ IndexType& getColumnIndex( const IndexType localIdx ); - + __cuda_callable__ const RealType& getValue( const IndexType localIdx ) const; diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index 70dac874e..ab5b4622b 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -17,9 +17,10 @@ namespace TNL { template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: SparseMatrixRowView( const SegmentViewType& segmentView, const ValuesViewType& values, const ColumnsIndexesViewType& columnIndexes ) @@ -29,9 +30,10 @@ SparseMatrixRowView( const SegmentViewType& segmentView, template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ auto -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getSize() const -> IndexType { return segmentView.getSize(); @@ -39,9 +41,10 @@ getSize() const -> IndexType template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ auto -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getColumnIndex( const IndexType localIdx ) const -> const IndexType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); @@ -50,9 +53,10 @@ getColumnIndex( const IndexType localIdx ) const -> const IndexType& template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ auto -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getColumnIndex( const IndexType localIdx ) -> IndexType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); @@ -61,9 +65,10 @@ getColumnIndex( const IndexType localIdx ) -> IndexType& template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ auto -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getValue( const IndexType localIdx ) const -> const RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); @@ -72,9 +77,10 @@ getValue( const IndexType localIdx ) const -> const RealType& template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > + typename ColumnsIndexesView, + bool isBinary_ > __cuda_callable__ auto -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getValue( const IndexType localIdx ) -> RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); @@ -83,9 +89,10 @@ getValue( const IndexType localIdx ) -> RealType& template< typename SegmentView, typename ValuesView, - typename ColumnsIndexesView > -__cuda_callable__ void -SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView >:: + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ void +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: setElement( const IndexType localIdx, const IndexType column, const RealType& value ) @@ -93,7 +100,8 @@ setElement( const IndexType localIdx, TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); const IndexType globalIdx = segmentView.getGlobalIndex( localIdx ); columnIndexes[ globalIdx ] = column; - values[ globalIdx ] = value; + if( ! isBinary() ) + values[ globalIdx ] = value; } diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 7168e1e8e..d8c6eb63f 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -27,6 +27,8 @@ template< typename Real, class SparseMatrixView : public MatrixView< Real, Device, Index > { public: + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + static constexpr bool isBinary() { return std::is_same< Real, bool >::value; }; using RealType = Real; template< typename Device_, typename Index_ > @@ -35,20 +37,20 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > using SegmentViewType = typename SegmentsViewType::SegmentViewType; using DeviceType = Device; using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; - using ValuesViewType = typename MatrixView< Real, Device, Index >::ValuesView; + using ValuesViewType = typename BaseType::ValuesView; using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; - using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; // TODO: remove this - it is here only for compatibility with original matrix implementation typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; __cuda_callable__ SparseMatrixView(); diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 2ccdc4838..3ddabc985 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -208,7 +208,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); } template< typename Real, diff --git a/src/TNL/Matrices/details/ValuesHolder.h b/src/TNL/Matrices/details/ValuesHolder.h deleted file mode 100644 index c76f0d344..000000000 --- a/src/TNL/Matrices/details/ValuesHolder.h +++ /dev/null @@ -1,154 +0,0 @@ -/*************************************************************************** - ValuesHolder.h - description - ------------------- - begin : Jan 27, 2020 - copyright : (C) 2020 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { - namespace Matrices { - namespace details { - - -template< typename Real, - typename Device, - typename Index > -struct ValuesHolderView -: public Containers::VectorView< Real, Device, Index > -{ - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - - using Containers::VectorView< Real, Device, Index >::VectorView; - using Containers::VectorView< Real, Device, Index >::operator=; - /*__cuda_callable__ - ValuesHolderView() = default; - - __cuda_callable__ - explicit ValuesHolderView( const ValuesHolderView& ) = default; - - __cuda_callable__ - ValuesHolderView( ValuesHolderView&& ) = default;*/ - -}; - -template< typename Real, - typename Device, - typename Index, - typename Allocator > -struct ValuesHolder -: public Containers::Vector< Real, Device, Index, Allocator > -{ - using RealType = Real; - using DeviceType = Device; - using IndexType = Index; - using AllocatorType = Allocator; - using ViewType = ValuesHolderView< Real, Device, Index >; - - using Containers::Vector< Real, Device, Index, Allocator >::Vector; - using Containers::Vector< Real, Device, Index, Allocator >::operator=; - /*ValuesHolder() = default; - - explicit ValuesHolder( const ValuesHolder& ) = default; - - explicit ValuesHolder( const ValuesHolder& vector, const AllocatorType& allocator ); - - ValuesHolder( ValuesHolder&& ) = default;*/ - -}; - -template< typename Device, - typename Index > -class BooleanValuesHolder -{ - public: - - using RealType = bool; - using DeviceType = Device; - using IndexType = Index; - using ViewType = BooleanValuesHolder; - - BooleanValuesHolder() - : size( 0 ){}; - - BooleanValuesHolder( const IndexType& size ) - : size( size ){}; - - void setSize( const IndexType& size ) { this->size = size; }; - - __cuda_callable__ - IndexType getSize() const { return this->size; }; - - __cuda_callable__ - bool operator[]( const IndexType& i ) const { return true; }; - - protected: - - IndexType size; -}; - -/** - * \brief Serialization of values holder into binary files. - */ -template< typename Device, typename Index, typename Allocator > -File& operator<<( File& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) { - file << holder.getSize(); - return file; }; - -template< typename Device, typename Index, typename Allocator > -File& operator<<( File&& file, const ValuesHolder< bool, Device, Index, Allocator >& holder ) { - file << holder.getSize(); - return file; }; - -/** - * \brief Deserialization of values holder from binary files. - */ -template< typename Device, typename Index, typename Allocator > -File& operator>>( File& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) { - Index size; - file >> size; - holder.setSize( size ); - return file; }; - -template< typename Device, typename Index, typename Allocator > -File& operator>>( File&& file, ValuesHolder< bool, Device, Index, Allocator >& holder ) { - Index size; - file >> size; - holder.setSize( size ); - return file; }; - -template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -struct ValuesHolderSetter -{ - using type = ValuesHolder< Real, Device, Index, RealAllocator >; -}; - -template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -struct SparseMatrixValuesHolderSetter -{ - using type = ValuesHolder< Real, Device, Index, RealAllocator >; -}; - -template< typename Device, - typename Index, - typename RealAllocator > -struct SparseMatrixValuesHolderSetter< bool, Device, Index, RealAllocator > -{ - using type = BooleanValuesHolder< Device, Index >; -}; - - } //namespace details - } //namepsace Matrices -} //namespace TNL diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp new file mode 100644 index 000000000..ea7b8d3c9 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest.cu new file mode 100644 index 000000000..916f14360 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest.h new file mode 100644 index 000000000..0abba5b86 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.h @@ -0,0 +1,117 @@ +/*************************************************************************** + BinarySparseMatrixTest.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "BinarySparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class CSRMatrixTest : public ::testing::Test +{ +protected: + using CSRMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using CSRMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, +#endif +>; + +TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); + +TYPED_TEST( CSRMatrixTest, setDimensionsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetDimensions< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetCompressedRowLengths< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setLikeTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetLike< CSRMatrixType, CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, resetTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Reset< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, getRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_GetRow< CSRMatrixType >(); +} + + +TYPED_TEST( CSRMatrixTest, setElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetElement< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, vectorProductTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_VectorProduct< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, rowsReduction ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_RowsReduction< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest" ); +} + +TYPED_TEST( CSRMatrixTest, printTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Print< CSRMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp new file mode 100644 index 000000000..5e969e976 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -0,0 +1,1573 @@ +/*************************************************************************** + SparseMatrixTest_impl.h - description + ------------------- + begin : Nov 22, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include +#include + +// Temporary, until test_OperatorEquals doesn't work for all formats. +#include +#include +#include + +#ifdef HAVE_GTEST +#include + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); + + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, 1 ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); + + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + } + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m; + m.setDimensions( rows, cols ); + + m.reset(); + + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + /*RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + }*/ + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + switch( rowIdx ) + { + case 0: + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, 1 ); + break; + case 1: + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, 1 ); + break; + case 2: + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, 1 ); + break; + case 3: + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, 1 ); + break; + case 4: + row.setElement( 0, 0, 1 ); + break; + case 5: + row.setElement( 0, 0, 1 ); + break; + case 6: + row.setElement( 0, 0, 1 ); + break; + case 7: + row.setElement( 0, 0, 1 ); + break; + case 8: + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, 1 ); + break; + case 9: + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, 1 ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, 1 ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, 1 ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, 1 ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); + + m_1.setElement( 0, 0, 1 ); // 0th row + + m_1.setElement( 1, 1, 1 ); // 1st row + m_1.setElement( 1, 3, 1 ); + + m_1.setElement( 2, 1, 1 ); // 2nd row + + m_1.setElement( 3, 2, 1 ); // 3rd row + + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); + + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); + + + m_1.vectorProduct( inVector_1, outVector_1 ); + + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2; + m_2.reset(); + m_2.setDimensions( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2; + rowLengths_2.setSize( m_rows_2 ); + rowLengths_2.setValue( 3 ); + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 ); + m_2.setCompressedRowLengths( rowLengths_2 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, 1 ); + + m_2.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, 1 ); + + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); + + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + + m_2.vectorProduct( inVector_2, outVector_2 ); + + + EXPECT_EQ( outVector_2.getElement( 0 ), 1 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 1 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 1 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 1 ); + + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3; + m_3.reset(); + m_3.setDimensions( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3; + rowLengths_3.setSize( m_rows_3 ); + rowLengths_3.setValue( 3 ); + m_3.setCompressedRowLengths( rowLengths_3 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, 1 ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, 1 ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, 1 ); + + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); + + + m_3.vectorProduct( inVector_3, outVector_3 ); + + + EXPECT_EQ( outVector_3.getElement( 0 ), 1 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 1 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 1 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 1 ); + + +/* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4; + m_4.reset(); + m_4.setDimensions( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4; + rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 ); + m_4.setCompressedRowLengths( rowLengths_4 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, 1 ); + + m_4.setElement( 0, 5, 1 ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, 1 ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, 1 ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, 1 ); + + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); + + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + + + m_4.vectorProduct( inVector_4, outVector_4 ); + + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5; + m_5.reset(); + m_5.setDimensions( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5; + rowLengths_5.setSize( m_rows_5 ); + rowLengths_5.setElement(0, 6); + rowLengths_5.setElement(1, 3); + rowLengths_5.setElement(2, 4); + rowLengths_5.setElement(3, 5); + rowLengths_5.setElement(4, 2); + rowLengths_5.setElement(5, 7); + rowLengths_5.setElement(6, 8); + rowLengths_5.setElement(7, 8); + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, 1 ); + + m_5.setElement( 0, 4, 1 ); // 0th row + m_5.setElement( 0, 5, 1 ); + + m_5.setElement( 1, 1, 1 ); // 1st row + m_5.setElement( 1, 3, 1 ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, 1 ); + + m_5.setElement( 2, 4, 1 ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, 1 ); + + m_5.setElement( 4, 1, 1 ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 1 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities( rows ); + //rowLengths.setSize( rows ); + rowsCapacities.setElement(0, 6); + rowsCapacities.setElement(1, 3); + rowsCapacities.setElement(2, 4); + rowsCapacities.setElement(3, 5); + rowsCapacities.setElement(4, 2); + rowsCapacities.setElement(5, 7); + rowsCapacities.setElement(6, 8); + rowsCapacities.setElement(7, 8); + m.setCompressedRowLengths( rowsCapacities ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + m.setElement( 0, 4, 1 ); // 0th row + m.setElement( 0, 5, 1 ); + + m.setElement( 1, 1, 1 ); // 1st row + m.setElement( 1, 3, 1 ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + m.setElement( 2, 4, 1 ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + m.setElement( 4, 1, 1 ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +// This test is only for AdEllpack +template< typename Matrix > +void test_OperatorEquals() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) + return; + else + { + using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; + using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows = 8; + const IndexType m_cols = 8; + + AdELL_host m_host; + + m_host.reset(); + m_host.setDimensions( m_rows, m_cols ); + typename AdELL_host::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setElement(0, 6); + rowLengths.setElement(1, 3); + rowLengths.setElement(2, 4); + rowLengths.setElement(3, 5); + rowLengths.setElement(4, 2); + rowLengths.setElement(5, 7); + rowLengths.setElement(6, 8); + rowLengths.setElement(7, 8); + m_host.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_host.setElement( 0, i, value++ ); + + m_host.setElement( 0, 4, value++ ); // 0th row + m_host.setElement( 0, 5, value++ ); + + m_host.setElement( 1, 1, value++ ); // 1st row + m_host.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_host.setElement( 2, i, value++ ); + + m_host.setElement( 2, 4, value++ ); // 2nd row + + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_host.setElement( 3, i, value++ ); + + m_host.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_host.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_host.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_host.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 + m_host.setElement( i, 7, 1); + + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + AdELL_cuda m_cuda; + + // Copy the host matrix into the cuda matrix + m_cuda = m_host; + + // Reset the host matrix + m_host.reset(); + + // Copy the cuda matrix back into the host matrix + m_host = m_cuda; + + // Check the newly created double-copy host matrix + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + // Try vectorProduct with copied cuda matrix to see if it works correctly. + using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; + + VectorType inVector; + inVector.setSize( m_cols ); + for( IndexType i = 0; i < inVector.getSize(); i++ ) + inVector.setElement( i, 2 ); + + VectorType outVector; + outVector.setSize( m_rows ); + for( IndexType j = 0; j < outVector.getSize(); j++ ) + outVector.setElement( j, 0 ); + + m_cuda.vectorProduct( inVector, outVector ); + + EXPECT_EQ( outVector.getElement( 0 ), 32 ); + EXPECT_EQ( outVector.getElement( 1 ), 28 ); + EXPECT_EQ( outVector.getElement( 2 ), 56 ); + EXPECT_EQ( outVector.getElement( 3 ), 102 ); + EXPECT_EQ( outVector.getElement( 4 ), 32 ); + EXPECT_EQ( outVector.getElement( 5 ), 224 ); + EXPECT_EQ( outVector.getElement( 6 ), 352 ); + EXPECT_EQ( outVector.getElement( 7 ), 520 ); + } +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * \ 0 9 10 11 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix; + savedMatrix.reset(); + savedMatrix.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, 1 ); + + savedMatrix.setElement( 1, 1, 1 ); + savedMatrix.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, 1 ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + loadedMatrix.reset(); + loadedMatrix.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths2; + rowLengths2.setSize( m_rows ); + rowLengths2.setValue( 3 ); + loadedMatrix.setCompressedRowLengths( rowLengths2 ); + + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + m.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + #include + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 1 -> Col:3->1\t\n" + "Row: 2 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 3 -> Col:1->1 Col:2->1 Col:3->1\t\n" + "Row: 4 -> Col:2->1 Col:3->1\t\n"; + + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index b19c8b705..4b12e81a3 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -28,6 +28,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -65,6 +68,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -76,6 +83,7 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) -- GitLab From c7940ae7464d2e6dc128d3661e253daea982a133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Fri, 31 Jan 2020 16:46:00 +0100 Subject: [PATCH 121/179] Debugging binary sparse matrix. --- src/TNL/Matrices/Multidiagonal.h | 4 ++-- src/TNL/Matrices/Tridiagonal.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index c93dc7d9c..3b92d1db1 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -35,8 +35,8 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > using RealAllocatorType = RealAllocator; using IndexAllocatorType = IndexAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using ValuesHolderType = typename BaseType::ValuesHolderType; - using ValuesViewType = typename ValuesHolderType::ViewType; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index b65cfb527..029793681 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -34,8 +34,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; - using ValuesHolderType = typename BaseType::ValuesHolderType; - using ValuesViewType = typename ValuesHolderType::ViewType; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; -- GitLab From c63e73869e5cab71023ef378814a27f8ec869dfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 3 Feb 2020 17:13:32 +0100 Subject: [PATCH 122/179] Fixed asserts in Array/ArrayView::operator[]. --- src/TNL/Containers/Array.hpp | 8 ++++---- src/TNL/Containers/ArrayView.hpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 2a60986f5..1b8d6291f 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -510,9 +510,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); @@ -530,9 +530,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 81e143ac2..3373fc6ab 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); @@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); -- GitLab From cfe193b9ddfd087ebc0c601faabf53a37a788294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 3 Feb 2020 21:32:40 +0100 Subject: [PATCH 123/179] Reverting fixes of asserts in Array/ArrayView::operator[] since it is not accepted by nvcc. --- src/TNL/Containers/Array.hpp | 8 ++++---- src/TNL/Containers/ArrayView.hpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 1b8d6291f..2a60986f5 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -510,9 +510,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); @@ -530,9 +530,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 3373fc6ab..81e143ac2 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); @@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); -- GitLab From 9c3bbace343101a6594011d947568800ab928af5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 3 Feb 2020 21:33:17 +0100 Subject: [PATCH 124/179] Debugging binary sparse matrix. --- src/TNL/Matrices/MatrixType.h | 36 + src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrixRowView.hpp | 2 + src/TNL/Matrices/SparseMatrixView.h | 2 +- src/TNL/Matrices/SparseMatrixView.hpp | 42 +- .../Matrices/BinarySparseMatrixTest.h | 30 +- .../Matrices/BinarySparseMatrixTest.hpp | 1447 ++++++++--------- src/UnitTests/Matrices/SparseMatrixTest.hpp | 21 - 8 files changed, 790 insertions(+), 792 deletions(-) diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h index c5c8f6375..8d4cfe7ba 100644 --- a/src/TNL/Matrices/MatrixType.h +++ b/src/TNL/Matrices/MatrixType.h @@ -13,15 +13,51 @@ namespace TNL { namespace Matrices { +template< bool Symmetric, + bool Binary > +struct MatrixType +{ + static constexpr bool isSymmetric() { return Symmetric; } + + static constexpr bool isBinary() { return Binary; } + +}; + struct GeneralMatrix { static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return false; } }; struct SymmetricMatrix { static constexpr bool isSymmetric() { return true; } + + static constexpr bool isBinary() { return false; } +}; + +struct BinaryMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } }; +struct BinarySymmetricMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } +}; + +struct SymmetricBinaryMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } +}; + + } //namespace Matrices } //namespace TNL \ No newline at end of file diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 49e3b45bb..7072ce3c4 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -32,7 +32,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > { public: static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; - static constexpr bool isBinary() { return std::is_same< Real, bool >::value; }; + static constexpr bool isBinary() { return MatrixType::isBinary(); }; using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index ab5b4622b..67d0845d4 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -72,6 +72,7 @@ SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getValue( const IndexType localIdx ) const -> const RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); return values[ segmentView.getGlobalIndex( localIdx ) ]; } @@ -84,6 +85,7 @@ SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: getValue( const IndexType localIdx ) -> RealType& { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); return values[ segmentView.getGlobalIndex( localIdx ) ]; } diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index d8c6eb63f..2756c80d7 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -28,7 +28,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > { public: static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; - static constexpr bool isBinary() { return std::is_same< Real, bool >::value; }; + static constexpr bool isBinary() { return MatrixType::isBinary(); }; using RealType = Real; template< typename Device_, typename Index_ > diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 4ac0a29b8..5b043753f 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -227,7 +227,8 @@ addElement( const IndexType row, col = this->columnIndexes.getElement( globalIdx ); if( col == column ) { - this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); + if( ! isBinary() ) + this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); return; } if( col == this->getPaddingIndex() || col > column ) @@ -242,7 +243,8 @@ addElement( const IndexType row, if( col == this->getPaddingIndex() ) { this->columnIndexes.setElement( globalIdx, column ); - this->values.setElement( globalIdx, value ); + if( ! isBinary() ) + this->values.setElement( globalIdx, value ); return; } else @@ -255,7 +257,8 @@ addElement( const IndexType row, TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); - this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); + if( ! isBinary() ) + this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); j--; } @@ -287,7 +290,12 @@ getElement( const IndexType row, TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); const IndexType col = this->columnIndexes.getElement( globalIdx ); if( col == column ) - return this->values.getElement( globalIdx ); + { + if( isBinary() ) + return 1; + else + return this->values.getElement( globalIdx ); + } } return 0.0; } @@ -334,6 +342,8 @@ vectorProduct( const InVector& inVector, compute = ( column != paddingIndex ); if( ! compute ) return 0.0; + if( isBinary() ) + return inVectorView[ column ]; return valuesView[ globalIdx ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { @@ -382,7 +392,12 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { IndexType columnIdx = columns_view[ globalIdx ]; if( columnIdx != paddingIndex_ ) - return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); + { + if( isBinary() ) + return fetch( rowIdx, columnIdx, globalIdx, 1 ); + else + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); + } return zero; }; this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); @@ -415,7 +430,10 @@ forRows( IndexType first, IndexType last, Function& function ) const const auto values_view = this->values.getConstView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool { - function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); + if( isBinary() ) + function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute ); + else + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); return true; }; this->segments.forSegments( first, last, f ); @@ -435,7 +453,10 @@ forRows( IndexType first, IndexType last, Function& function ) auto values_view = this->values.getView(); const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { - function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); + if( isBinary() ) + function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute ); + else + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); }; this->segments.forSegments( first, last, f ); } @@ -573,7 +594,12 @@ print( std::ostream& str ) const const IndexType column = this->columnIndexes.getElement( globalIdx ); if( column == this->getPaddingIndex() ) break; - str << " Col:" << column << "->" << this->values.getElement( globalIdx ) << "\t"; + RealType value; + if( isBinary() ) + value = 1.0; + else + value = this->values.getElement( globalIdx ); + str << " Col:" << column << "->" << value << "\t"; } str << std::endl; } diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest.h index 0abba5b86..cb0d0bab5 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.h @@ -29,13 +29,31 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR > #endif >; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp index 5e969e976..3d1775972 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -153,16 +153,16 @@ void test_GetNumberOfNonzeroMatrixElements() /* * Sets up the following 10x10 sparse matrix: * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / */ const IndexType rows = 10; @@ -175,38 +175,37 @@ void test_GetNumberOfNonzeroMatrixElements() typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); + rowLengths.setElement( 0, 1 ); + rowLengths.setElement( 1, 1 ); + rowLengths.setElement( 2, 1 ); + rowLengths.setElement( 3, 1 ); for( IndexType i = 4; i < rows - 2; i++ ) { rowLengths.setElement( i, 1 ); } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); + rowLengths.setElement( 8, 1 ); + rowLengths.setElement( 9, 1 ); m.setCompressedRowLengths( rowLengths ); - RealType value = 1; for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); + m.setElement( 0, 2 * i, 1 ); for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); + m.setElement( 1, i, 1 ); for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); + m.setElement( 2, i, 1 ); for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); + m.setElement( 3, i, 1 ); for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); + m.setElement( i, 0, 1 ); for( IndexType j = 8; j < rows; j++) { for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); + m.setElement( j, i, 1 ); } EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); @@ -252,16 +251,16 @@ void test_GetRow() /* * Sets up the following 10x10 sparse matrix: * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / */ const IndexType rows = 10; @@ -283,27 +282,6 @@ void test_GetRow() rowLengths.setElement( 9, 10 ); m.setCompressedRowLengths( rowLengths ); - /*RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); - - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); - - for( IndexType j = 8; j < rows; j++) - { - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); - }*/ auto matrixView = m.getView(); auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); @@ -349,115 +327,115 @@ void test_GetRow() }; TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 2 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 3 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 4 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 5 ); - EXPECT_EQ( m.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m.getElement( 1, 2 ), 7 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 8 ); - EXPECT_EQ( m.getElement( 2, 1 ), 9 ); - EXPECT_EQ( m.getElement( 2, 2 ), 10 ); - EXPECT_EQ( m.getElement( 2, 3 ), 11 ); - EXPECT_EQ( m.getElement( 2, 4 ), 12 ); - EXPECT_EQ( m.getElement( 2, 5 ), 13 ); - EXPECT_EQ( m.getElement( 2, 6 ), 14 ); - EXPECT_EQ( m.getElement( 2, 7 ), 15 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 18 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 19 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 20 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 21 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 22 ); - EXPECT_EQ( m.getElement( 8, 1 ), 23 ); - EXPECT_EQ( m.getElement( 8, 2 ), 24 ); - EXPECT_EQ( m.getElement( 8, 3 ), 25 ); - EXPECT_EQ( m.getElement( 8, 4 ), 26 ); - EXPECT_EQ( m.getElement( 8, 5 ), 27 ); - EXPECT_EQ( m.getElement( 8, 6 ), 28 ); - EXPECT_EQ( m.getElement( 8, 7 ), 29 ); - EXPECT_EQ( m.getElement( 8, 8 ), 30 ); - EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 32 ); - EXPECT_EQ( m.getElement( 9, 1 ), 33 ); - EXPECT_EQ( m.getElement( 9, 2 ), 34 ); - EXPECT_EQ( m.getElement( 9, 3 ), 35 ); - EXPECT_EQ( m.getElement( 9, 4 ), 36 ); - EXPECT_EQ( m.getElement( 9, 5 ), 37 ); - EXPECT_EQ( m.getElement( 9, 6 ), 38 ); - EXPECT_EQ( m.getElement( 9, 7 ), 39 ); - EXPECT_EQ( m.getElement( 9, 8 ), 40 ); - EXPECT_EQ( m.getElement( 9, 9 ), 41 ); + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); } @@ -471,16 +449,16 @@ void test_SetElement() /* * Sets up the following 10x10 sparse matrix: * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / */ const IndexType rows = 10; @@ -640,328 +618,290 @@ void test_SetElement() template< typename Matrix > void test_VectorProduct() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 0 0 0 \ - * | 0 2 0 3 | - * | 0 4 0 0 | - * \ 0 0 5 0 / - */ - - const IndexType m_rows_1 = 4; - const IndexType m_cols_1 = 4; - - Matrix m_1; - m_1.reset(); - m_1.setDimensions( m_rows_1, m_cols_1 ); - typename Matrix::CompressedRowLengthsVector rowLengths_1; - rowLengths_1.setSize( m_rows_1 ); - rowLengths_1.setElement( 0, 1 ); - rowLengths_1.setElement( 1, 2 ); - rowLengths_1.setElement( 2, 1 ); - rowLengths_1.setElement( 3, 1 ); - m_1.setCompressedRowLengths( rowLengths_1 ); - - m_1.setElement( 0, 0, 1 ); // 0th row - - m_1.setElement( 1, 1, 1 ); // 1st row - m_1.setElement( 1, 3, 1 ); - - m_1.setElement( 2, 1, 1 ); // 2nd row - - m_1.setElement( 3, 2, 1 ); // 3rd row - - VectorType inVector_1; - inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) - inVector_1.setElement( i, 2 ); - - VectorType outVector_1; - outVector_1.setSize( m_rows_1 ); - for( IndexType j = 0; j < outVector_1.getSize(); j++ ) - outVector_1.setElement( j, 0 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 1 0 1 | + * | 0 1 0 0 | + * \ 0 0 1 0 / + */ - m_1.vectorProduct( inVector_1, outVector_1 ); + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + Matrix m_1( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); - EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); - EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); - EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); - EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + m_1.setElement( 0, 0, 1 ); // 0th row + m_1.setElement( 1, 1, 1 ); // 1st row + m_1.setElement( 1, 3, 1 ); + + m_1.setElement( 2, 1, 1 ); // 2nd row -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * \ 0 8 0 0 / - */ + m_1.setElement( 3, 2, 1 ); // 3rd row - const IndexType m_rows_2 = 4; - const IndexType m_cols_2 = 4; + VectorType inVector_1( m_cols_1 ); + inVector_1 = 2.0; - Matrix m_2; - m_2.reset(); - m_2.setDimensions( m_rows_2, m_cols_2 ); - typename Matrix::CompressedRowLengthsVector rowLengths_2; - rowLengths_2.setSize( m_rows_2 ); - rowLengths_2.setValue( 3 ); - rowLengths_2.setElement( 1, 1 ); - rowLengths_2.setElement( 3, 1 ); - m_2.setCompressedRowLengths( rowLengths_2 ); + VectorType outVector_1( m_rows_1 ); + outVector_1 = 0.0; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_2.setElement( 0, i, 1 ); + m_1.vectorProduct( inVector_1, outVector_1 ); - m_2.setElement( 1, 3, 1 ); // 1st row - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_2.setElement( 2, i, 1 ); + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 4 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 2 ); - for( IndexType i = 1; i < 2; i++ ) // 3rd row - m_2.setElement( 3, i, 1 ); - VectorType inVector_2; - inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) - inVector_2.setElement( i, 2 ); + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 0 0 1 | + * | 1 1 1 0 | + * \ 0 1 0 0 / + */ + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; - VectorType outVector_2; - outVector_2.setSize( m_rows_2 ); - for( IndexType j = 0; j < outVector_2.getSize(); j++ ) - outVector_2.setElement( j, 0 ); + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2; + rowLengths_2.setSize( m_rows_2 ); + rowLengths_2.setValue( 3 ); + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 ); + m_2.setCompressedRowLengths( rowLengths_2 ); + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, 1 ); - m_2.vectorProduct( inVector_2, outVector_2 ); + m_2.setElement( 1, 3, 1 ); // 1st row + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, 1 ); - EXPECT_EQ( outVector_2.getElement( 0 ), 1 ); - EXPECT_EQ( outVector_2.getElement( 1 ), 1 ); - EXPECT_EQ( outVector_2.getElement( 2 ), 1 ); - EXPECT_EQ( outVector_2.getElement( 3 ), 1 ); + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, 1 ); + VectorType inVector_2( m_cols_2 ); + inVector_2 = 2.0; -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 4 5 6 | - * | 7 8 9 0 | - * \ 0 10 11 12 / - */ + VectorType outVector_2( m_rows_2 ); + outVector_2 = 0.0; - const IndexType m_rows_3 = 4; - const IndexType m_cols_3 = 4; + m_2.vectorProduct( inVector_2, outVector_2 ); - Matrix m_3; - m_3.reset(); - m_3.setDimensions( m_rows_3, m_cols_3 ); - typename Matrix::CompressedRowLengthsVector rowLengths_3; - rowLengths_3.setSize( m_rows_3 ); - rowLengths_3.setValue( 3 ); - m_3.setCompressedRowLengths( rowLengths_3 ); - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_3.setElement( 0, i, 1 ); + EXPECT_EQ( outVector_2.getElement( 0 ), 6 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 2 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 6 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 2 ); - for( IndexType i = 1; i < 4; i++ ) - m_3.setElement( 1, i, 1 ); // 1st row - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_3.setElement( 2, i, 1 ); + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 1 1 1 | + * | 1 1 1 0 | + * \ 0 1 1 1 / + */ + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; - for( IndexType i = 1; i < 4; i++ ) // 3rd row - m_3.setElement( 3, i, 1 ); + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3; + rowLengths_3.setSize( m_rows_3 ); + rowLengths_3.setValue( 3 ); + m_3.setCompressedRowLengths( rowLengths_3 ); - VectorType inVector_3; - inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) - inVector_3.setElement( i, 2 ); + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, 1 ); - VectorType outVector_3; - outVector_3.setSize( m_rows_3 ); - for( IndexType j = 0; j < outVector_3.getSize(); j++ ) - outVector_3.setElement( j, 0 ); + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, 1 ); // 1st row + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, 1 ); - m_3.vectorProduct( inVector_3, outVector_3 ); + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, 1 ); + VectorType inVector_3( m_cols_3 ); + inVector_3 = 2.0; - EXPECT_EQ( outVector_3.getElement( 0 ), 1 ); - EXPECT_EQ( outVector_3.getElement( 1 ), 1 ); - EXPECT_EQ( outVector_3.getElement( 2 ), 1 ); - EXPECT_EQ( outVector_3.getElement( 3 ), 1 ); + VectorType outVector_3( m_rows_3 ); + outVector_3 = 0.0; + m_3.vectorProduct( inVector_3, outVector_3 ); -/* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 0 4 0 0 \ - * | 0 5 6 7 8 0 0 0 | - * | 9 10 11 12 13 0 0 0 | - * | 0 14 15 16 17 0 0 0 | - * | 0 0 18 19 20 21 0 0 | - * | 0 0 0 22 23 24 25 0 | - * | 26 27 28 29 30 0 0 0 | - * \ 31 32 33 34 35 0 0 0 / - */ - const IndexType m_rows_4 = 8; - const IndexType m_cols_4 = 8; + EXPECT_EQ( outVector_3.getElement( 0 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 6 ); - Matrix m_4; - m_4.reset(); - m_4.setDimensions( m_rows_4, m_cols_4 ); - typename Matrix::CompressedRowLengthsVector rowLengths_4; - rowLengths_4.setSize( m_rows_4 ); - rowLengths_4.setValue( 4 ); - rowLengths_4.setElement( 2, 5 ); - rowLengths_4.setElement( 6, 5 ); - rowLengths_4.setElement( 7, 5 ); - m_4.setCompressedRowLengths( rowLengths_4 ); + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 1 1 0 0 1 0 0 \ + * | 0 1 1 1 1 0 0 0 | + * | 1 1 1 1 1 0 0 0 | + * | 0 1 1 1 1 0 0 0 | + * | 0 0 1 1 1 1 0 0 | + * | 0 0 0 1 1 1 1 0 | + * | 1 1 1 1 1 0 0 0 | + * \ 1 1 1 1 1 0 0 0 / + */ + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_4.setElement( 0, i, 1 ); + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4; + rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 ); + m_4.setCompressedRowLengths( rowLengths_4 ); - m_4.setElement( 0, 5, 1 ); + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, 1 ); - for( IndexType i = 1; i < 5; i++ ) // 1st row - m_4.setElement( 1, i, 1 ); + m_4.setElement( 0, 5, 1 ); - for( IndexType i = 0; i < 5; i++ ) // 2nd row - m_4.setElement( 2, i, 1 ); + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, 1 ); - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_4.setElement( 3, i, 1 ); + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, 1 ); - for( IndexType i = 2; i < 6; i++ ) // 4th row - m_4.setElement( 4, i, 1 ); + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, 1 ); - for( IndexType i = 3; i < 7; i++ ) // 5th row - m_4.setElement( 5, i, 1 ); + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, 1 ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m_4.setElement( 6, i, 1 ); + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, 1 ); - for( IndexType i = 0; i < 5; i++ ) // 7th row - m_4.setElement( 7, i, 1 ); + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, 1 ); - VectorType inVector_4; - inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) - inVector_4.setElement( i, 2 ); + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, 1 ); - VectorType outVector_4; - outVector_4.setSize( m_rows_4 ); - for( IndexType j = 0; j < outVector_4.getSize(); j++ ) - outVector_4.setElement( j, 0 ); + VectorType inVector_4( m_cols_4 ); + inVector_4 = 2.0; + VectorType outVector_4( m_rows_4 ); + outVector_4 = 0.0; - m_4.vectorProduct( inVector_4, outVector_4 ); + m_4.vectorProduct( inVector_4, outVector_4 ); - EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); - EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); - EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); - EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); - EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); - EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); - EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); - EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + EXPECT_EQ( outVector_4.getElement( 0 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 10 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 10 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 10 ); /* * Sets up the following 8x8 sparse matrix: * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 + * / 1 1 1 0 1 1 0 1 \ 6 + * | 0 1 0 1 0 0 0 1 | 3 + * | 0 1 1 0 1 0 0 1 | 4 + * | 0 1 1 1 1 0 0 1 | 5 + * | 0 1 0 0 0 0 0 1 | 2 + * | 0 1 1 1 1 1 1 1 | 7 + * | 1 1 1 1 1 1 1 1 | 8 + * \ 1 1 1 1 1 1 1 1 / 8 */ - const IndexType m_rows_5 = 8; - const IndexType m_cols_5 = 8; - - Matrix m_5; - m_5.reset(); - m_5.setDimensions( m_rows_5, m_cols_5 ); - typename Matrix::CompressedRowLengthsVector rowLengths_5; - rowLengths_5.setSize( m_rows_5 ); - rowLengths_5.setElement(0, 6); - rowLengths_5.setElement(1, 3); - rowLengths_5.setElement(2, 4); - rowLengths_5.setElement(3, 5); - rowLengths_5.setElement(4, 2); - rowLengths_5.setElement(5, 7); - rowLengths_5.setElement(6, 8); - rowLengths_5.setElement(7, 8); - m_5.setCompressedRowLengths( rowLengths_5 ); - - RealType value_5 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_5.setElement( 0, i, 1 ); + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5; + rowLengths_5.setSize( m_rows_5 ); + rowLengths_5.setElement(0, 6); + rowLengths_5.setElement(1, 3); + rowLengths_5.setElement(2, 4); + rowLengths_5.setElement(3, 5); + rowLengths_5.setElement(4, 2); + rowLengths_5.setElement(5, 7); + rowLengths_5.setElement(6, 8); + rowLengths_5.setElement(7, 8); + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, 1 ); - m_5.setElement( 0, 4, 1 ); // 0th row - m_5.setElement( 0, 5, 1 ); + m_5.setElement( 0, 4, 1 ); // 0th row + m_5.setElement( 0, 5, 1 ); - m_5.setElement( 1, 1, 1 ); // 1st row - m_5.setElement( 1, 3, 1 ); + m_5.setElement( 1, 1, 1 ); // 1st row + m_5.setElement( 1, 3, 1 ); - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_5.setElement( 2, i, 1 ); + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, 1 ); - m_5.setElement( 2, 4, 1 ); // 2nd row + m_5.setElement( 2, 4, 1 ); // 2nd row - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_5.setElement( 3, i, 1 ); + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, 1 ); - m_5.setElement( 4, 1, 1 ); // 4th row + m_5.setElement( 4, 1, 1 ); // 4th row - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_5.setElement( 5, i, 1 ); + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, 1 ); - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_5.setElement( 6, i, 1 ); + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, 1 ); - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_5.setElement( 7, i, 1 ); + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, 1 ); - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m_5.setElement( i, 7, 1); + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); - VectorType inVector_5; - inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) - inVector_5.setElement( i, 2 ); + VectorType inVector_5( m_cols_5 ); + inVector_5 = 2.0; - VectorType outVector_5; - outVector_5.setSize( m_rows_5 ); - for( IndexType j = 0; j < outVector_5.getSize(); j++ ) - outVector_5.setElement( j, 0 ); + VectorType outVector_5( m_rows_5 ); + outVector_5 = 0.0; - m_5.vectorProduct( inVector_5, outVector_5 ); + m_5.vectorProduct( inVector_5, outVector_5 ); - EXPECT_EQ( outVector_5.getElement( 0 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 1 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 2 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 3 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 4 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 5 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 6 ), 1 ); - EXPECT_EQ( outVector_5.getElement( 7 ), 1 ); + EXPECT_EQ( outVector_5.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 6 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 10 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 4 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 14 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 16 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 16 ); } template< typename Matrix > @@ -974,21 +914,20 @@ void test_RowsReduction() /* * Sets up the following 8x8 sparse matrix: * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 + * / 1 1 1 0 1 1 0 1 \ 6 + * | 0 1 0 1 0 0 0 1 | 3 + * | 0 1 1 0 1 0 0 1 | 4 + * | 0 1 1 1 1 0 0 1 | 5 + * | 0 1 0 0 0 0 0 1 | 2 + * | 0 1 1 1 1 1 1 1 | 7 + * | 1 1 1 1 1 1 1 1 | 8 + * \ 1 1 1 1 1 1 1 1 / 8 */ const IndexType rows = 8; const IndexType cols = 8; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); typename Matrix::RowsCapacitiesType rowsCapacities( rows ); //rowLengths.setSize( rows ); rowsCapacities.setElement(0, 6); @@ -1065,87 +1004,85 @@ void test_RowsReduction() }; m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); const RealType maxNorm = TNL::max( rowSums ); - EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 + EXPECT_EQ( maxNorm, 8 ) ; // 29+30+31+32+33+34+35+36 } template< typename Matrix > void test_PerformSORIteration() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 4x4 sparse matrix: - * - * / 4 1 0 0 \ - * | 1 4 1 0 | - * | 0 1 4 1 | - * \ 0 0 1 4 / - */ + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 0 0 \ + * | 1 1 1 0 | + * | 0 1 1 1 | + * \ 0 0 1 1 / + */ - const IndexType m_rows = 4; - const IndexType m_cols = 4; + const IndexType m_rows = 4; + const IndexType m_cols = 4; - Matrix m; - m.reset(); - m.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); - m.setElement( 0, 0, 4.0 ); // 0th row - m.setElement( 0, 1, 1.0); + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); - m.setElement( 1, 0, 1.0 ); // 1st row - m.setElement( 1, 1, 4.0 ); - m.setElement( 1, 2, 1.0 ); + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); - m.setElement( 2, 1, 1.0 ); // 2nd row - m.setElement( 2, 2, 4.0 ); - m.setElement( 2, 3, 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); - m.setElement( 3, 2, 1.0 ); // 3rd row - m.setElement( 3, 3, 4.0 ); + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); - RealType bVector [ 4 ] = { 1, 1, 1, 1 }; - RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; - IndexType row = 0; - RealType omega = 1; + IndexType row = 0; + RealType omega = 1; - m.performSORIteration( bVector, row++, xVector, omega); + m.performSORIteration( bVector, row++, xVector, omega); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 1.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); - m.performSORIteration( bVector, row++, xVector, omega); + m.performSORIteration( bVector, row++, xVector, omega); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); - m.performSORIteration( bVector, row++, xVector, omega); + m.performSORIteration( bVector, row++, xVector, omega); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); - m.performSORIteration( bVector, row++, xVector, omega); + m.performSORIteration( bVector, row++, xVector, omega); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 0.25 ); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); } // This test is only for AdEllpack @@ -1157,256 +1094,256 @@ void test_OperatorEquals() using IndexType = typename Matrix::IndexType; if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) - return; + return; else { - using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; - using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ - - const IndexType m_rows = 8; - const IndexType m_cols = 8; - - AdELL_host m_host; - - m_host.reset(); - m_host.setDimensions( m_rows, m_cols ); - typename AdELL_host::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setElement(0, 6); - rowLengths.setElement(1, 3); - rowLengths.setElement(2, 4); - rowLengths.setElement(3, 5); - rowLengths.setElement(4, 2); - rowLengths.setElement(5, 7); - rowLengths.setElement(6, 8); - rowLengths.setElement(7, 8); - m_host.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_host.setElement( 0, i, value++ ); - - m_host.setElement( 0, 4, value++ ); // 0th row - m_host.setElement( 0, 5, value++ ); - - m_host.setElement( 1, 1, value++ ); // 1st row - m_host.setElement( 1, 3, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_host.setElement( 2, i, value++ ); - - m_host.setElement( 2, 4, value++ ); // 2nd row - - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_host.setElement( 3, i, value++ ); - - m_host.setElement( 4, 1, value++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_host.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_host.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_host.setElement( 7, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 - m_host.setElement( i, 7, 1); - - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - AdELL_cuda m_cuda; - - // Copy the host matrix into the cuda matrix - m_cuda = m_host; - - // Reset the host matrix - m_host.reset(); - - // Copy the cuda matrix back into the host matrix - m_host = m_cuda; - - // Check the newly created double-copy host matrix - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - // Try vectorProduct with copied cuda matrix to see if it works correctly. - using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; - - VectorType inVector; - inVector.setSize( m_cols ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) - inVector.setElement( i, 2 ); - - VectorType outVector; - outVector.setSize( m_rows ); - for( IndexType j = 0; j < outVector.getSize(); j++ ) - outVector.setElement( j, 0 ); - - m_cuda.vectorProduct( inVector, outVector ); - - EXPECT_EQ( outVector.getElement( 0 ), 32 ); - EXPECT_EQ( outVector.getElement( 1 ), 28 ); - EXPECT_EQ( outVector.getElement( 2 ), 56 ); - EXPECT_EQ( outVector.getElement( 3 ), 102 ); - EXPECT_EQ( outVector.getElement( 4 ), 32 ); - EXPECT_EQ( outVector.getElement( 5 ), 224 ); - EXPECT_EQ( outVector.getElement( 6 ), 352 ); - EXPECT_EQ( outVector.getElement( 7 ), 520 ); + using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; + using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows = 8; + const IndexType m_cols = 8; + + AdELL_host m_host; + + m_host.reset(); + m_host.setDimensions( m_rows, m_cols ); + typename AdELL_host::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setElement(0, 6); + rowLengths.setElement(1, 3); + rowLengths.setElement(2, 4); + rowLengths.setElement(3, 5); + rowLengths.setElement(4, 2); + rowLengths.setElement(5, 7); + rowLengths.setElement(6, 8); + rowLengths.setElement(7, 8); + m_host.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_host.setElement( 0, i, value++ ); + + m_host.setElement( 0, 4, value++ ); // 0th row + m_host.setElement( 0, 5, value++ ); + + m_host.setElement( 1, 1, value++ ); // 1st row + m_host.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_host.setElement( 2, i, value++ ); + + m_host.setElement( 2, 4, value++ ); // 2nd row + + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_host.setElement( 3, i, value++ ); + + m_host.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_host.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_host.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_host.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 + m_host.setElement( i, 7, 1); + + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + AdELL_cuda m_cuda; + + // Copy the host matrix into the cuda matrix + m_cuda = m_host; + + // Reset the host matrix + m_host.reset(); + + // Copy the cuda matrix back into the host matrix + m_host = m_cuda; + + // Check the newly created double-copy host matrix + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); + EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); + EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); + EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); + EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); + EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); + EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); + EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); + EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); + EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); + EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); + EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); + EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); + EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); + EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); + EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); + EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); + EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); + + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); + EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); + EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); + EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); + EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); + EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); + EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); + EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); + + // Try vectorProduct with copied cuda matrix to see if it works correctly. + using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; + + VectorType inVector; + inVector.setSize( m_cols ); + for( IndexType i = 0; i < inVector.getSize(); i++ ) + inVector.setElement( i, 2 ); + + VectorType outVector; + outVector.setSize( m_rows ); + for( IndexType j = 0; j < outVector.getSize(); j++ ) + outVector.setElement( j, 0 ); + + m_cuda.vectorProduct( inVector, outVector ); + + EXPECT_EQ( outVector.getElement( 0 ), 32 ); + EXPECT_EQ( outVector.getElement( 1 ), 28 ); + EXPECT_EQ( outVector.getElement( 2 ), 56 ); + EXPECT_EQ( outVector.getElement( 3 ), 102 ); + EXPECT_EQ( outVector.getElement( 4 ), 32 ); + EXPECT_EQ( outVector.getElement( 5 ), 224 ); + EXPECT_EQ( outVector.getElement( 6 ), 352 ); + EXPECT_EQ( outVector.getElement( 7 ), 520 ); } } @@ -1420,10 +1357,10 @@ void test_SaveAndLoad( const char* filename ) /* * Sets up the following 4x4 sparse matrix: * - * / 1 2 3 0 \ - * | 0 4 0 5 | - * | 6 7 8 0 | - * \ 0 9 10 11 / + * / 1 1 1 0 \ + * | 0 1 0 1 | + * | 1 1 1 0 | + * \ 0 1 1 1 / */ const IndexType m_rows = 4; @@ -1484,24 +1421,24 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 1 ); EXPECT_EQ( std::remove( filename ), 0 ); } @@ -1516,11 +1453,11 @@ void test_Print() /* * Sets up the following 5x4 sparse matrix: * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * | 0 8 9 10 | - * \ 0 0 11 12 / + * / 1 1 1 0 \ + * | 0 0 0 1 | + * | 1 1 1 0 | + * | 0 1 1 1 | + * \ 0 0 1 1 / */ const IndexType m_rows = 5; diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index b0a9fcb00..b6b5a368f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -284,27 +284,6 @@ void test_GetRow() rowLengths.setElement( 9, 10 ); m.setCompressedRowLengths( rowLengths ); - /*RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); - - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); - - for( IndexType j = 8; j < rows; j++) - { - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); - }*/ auto matrixView = m.getView(); auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { auto row = matrixView.getRow( rowIdx ); -- GitLab From 91b5371c2f1491ee6914b08dcf378335a31bff21 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Feb 2020 10:38:06 +0100 Subject: [PATCH 125/179] All binary sparse matrix unit tests pass well. --- src/TNL/Matrices/SparseMatrixView.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 5b043753f..afc21788a 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -454,7 +454,10 @@ forRows( IndexType first, IndexType last, Function& function ) const IndexType paddingIndex_ = this->getPaddingIndex(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { if( isBinary() ) - function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute ); + { + RealType one( columns_view[ globalIdx ] != paddingIndex_ ); + function( rowIdx, localIdx, columns_view[ globalIdx ], one, compute ); + } else function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); }; -- GitLab From 7fd796ef33d91dd538392de1b1f2afe9dfe29a5d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Feb 2020 10:48:44 +0100 Subject: [PATCH 126/179] Deleted useless general unit tests of sparse matrices. --- src/UnitTests/Matrices/CMakeLists.txt | 8 ----- src/UnitTests/Matrices/SparseMatrixTest.cpp | 11 ------ src/UnitTests/Matrices/SparseMatrixTest.cu | 11 ------ src/UnitTests/Matrices/SparseMatrixTest.h | 39 --------------------- 4 files changed, 69 deletions(-) delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.cpp delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.cu delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest.h diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 4b12e81a3..d7259fc03 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -1,9 +1,6 @@ ADD_SUBDIRECTORY( Legacy ) IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) @@ -32,10 +29,6 @@ IF( BUILD_CUDA ) TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) @@ -74,7 +67,6 @@ ELSE( BUILD_CUDA ) ENDIF( BUILD_CUDA ) -ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cpp b/src/UnitTests/Matrices/SparseMatrixTest.cpp deleted file mode 100644 index 46f6b9bd3..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest.cpp +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - SparseMatrixTest.cpp - description - ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "SparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cu b/src/UnitTests/Matrices/SparseMatrixTest.cu deleted file mode 100644 index 01c23c193..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest.cu +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - SparseMatrixTest.cu - description - ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "SparseMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h deleted file mode 100644 index b08d66c33..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ /dev/null @@ -1,39 +0,0 @@ -/*************************************************************************** - SparseMatrixTest.h - description - ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include - -#include "SparseMatrixTest.hpp" -#include - -#ifdef HAVE_GTEST -#include - -using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >; -using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; - -using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >; -using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; - -TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host ) -{ - //test_PerformSORIteration< CSR_host_float >(); -} - -#ifdef HAVE_CUDA -TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda ) -{ - // test_PerformSORIteration< CSR_cuda_float >(); -} -#endif - -#endif - -#include "../main.h" -- GitLab From 866062ceea3a81771c12f2a8fdb6c2c2194a6f9e Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Feb 2020 12:12:58 +0100 Subject: [PATCH 127/179] Renaming SparseMatrixTest.hpp to SparseMatrixTest.h. --- .../Matrices/{SparseMatrixTest.hpp => SparseMatrixTest.h} | 0 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h | 2 +- .../Matrices/SparseMatrixTest_SlicedEllpack_segments.h | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/UnitTests/Matrices/{SparseMatrixTest.hpp => SparseMatrixTest.h} (100%) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.h similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest.hpp rename to src/UnitTests/Matrices/SparseMatrixTest.h diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index 5ac3dde26..1e89d544a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -12,7 +12,7 @@ #include -#include "SparseMatrixTest.hpp" +#include "SparseMatrixTest.h" #include #ifdef HAVE_GTEST diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 48cf9afbf..32678c2b0 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -12,7 +12,7 @@ #include -#include "SparseMatrixTest.hpp" +#include "SparseMatrixTest.h" #include #ifdef HAVE_GTEST diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h index de5356f3a..11365cc5b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h @@ -13,7 +13,7 @@ #include -#include "SparseMatrixTest.hpp" +#include "SparseMatrixTest.h" #include #ifdef HAVE_GTEST -- GitLab From c5f2624b75c33d649173edac3c34007bcfa7cd84 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Feb 2020 12:36:01 +0100 Subject: [PATCH 128/179] Renaming sparse matrix unit tests. --- src/UnitTests/Matrices/CMakeLists.txt | 50 +++++++++---------- .../Matrices/SparseMatrixTest_CSR.cpp | 1 + ...R_segments.cpp => SparseMatrixTest_CSR.cu} | 0 ..._CSR_segments.h => SparseMatrixTest_CSR.h} | 0 .../Matrices/SparseMatrixTest_CSR_segments.cu | 1 - .../Matrices/SparseMatrixTest_Ellpack.cpp | 1 + ...gments.cpp => SparseMatrixTest_Ellpack.cu} | 0 ..._segments.h => SparseMatrixTest_Ellpack.h} | 0 .../SparseMatrixTest_Ellpack_segments.cu | 1 - .../SparseMatrixTest_SlicedEllpack.cpp | 1 + ....cpp => SparseMatrixTest_SlicedEllpack.cu} | 0 ...nts.h => SparseMatrixTest_SlicedEllpack.h} | 0 ...SparseMatrixTest_SlicedEllpack_segments.cu | 1 - 13 files changed, 28 insertions(+), 28 deletions(-) create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp rename src/UnitTests/Matrices/{SparseMatrixTest_CSR_segments.cpp => SparseMatrixTest_CSR.cu} (100%) rename src/UnitTests/Matrices/{SparseMatrixTest_CSR_segments.h => SparseMatrixTest_CSR.h} (100%) delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp rename src/UnitTests/Matrices/{SparseMatrixTest_Ellpack_segments.cpp => SparseMatrixTest_Ellpack.cu} (100%) rename src/UnitTests/Matrices/{SparseMatrixTest_Ellpack_segments.h => SparseMatrixTest_Ellpack.h} (100%) delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp rename src/UnitTests/Matrices/{SparseMatrixTest_SlicedEllpack_segments.cpp => SparseMatrixTest_SlicedEllpack.cu} (100%) rename src/UnitTests/Matrices/{SparseMatrixTest_SlicedEllpack_segments.h => SparseMatrixTest_SlicedEllpack.h} (100%) delete mode 100644 src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index d7259fc03..c4b2fabd3 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -1,9 +1,6 @@ ADD_SUBDIRECTORY( Legacy ) IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -16,23 +13,22 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) @@ -49,17 +45,21 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack_segments SparseMatrixTest_SlicedEllpack_segments.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -72,9 +72,9 @@ ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECU ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_SlicedEllpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp new file mode 100644 index 000000000..258ad2c53 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cpp rename to src/UnitTests/Matrices/SparseMatrixTest_CSR.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h rename to src/UnitTests/Matrices/SparseMatrixTest_CSR.h diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu deleted file mode 100644 index 771c74b9a..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.cu +++ /dev/null @@ -1 +0,0 @@ -#include "SparseMatrixTest_CSR_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp new file mode 100644 index 000000000..c454706f0 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp rename to src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h rename to src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu deleted file mode 100644 index 63219e9b0..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu +++ /dev/null @@ -1 +0,0 @@ -#include "SparseMatrixTest_Ellpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp new file mode 100644 index 000000000..40e2e94b8 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cpp rename to src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.h rename to src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu deleted file mode 100644 index a88301100..000000000 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack_segments.cu +++ /dev/null @@ -1 +0,0 @@ -#include "SparseMatrixTest_SlicedEllpack_segments.h" -- GitLab From 4126b7c320369cafc354c02df7c9091de697077a Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 4 Feb 2020 12:48:57 +0100 Subject: [PATCH 129/179] Refactoring binary sparse matrix unit tests. --- .../Matrices/BinarySparseMatrixTest.hpp | 1395 +++++++---------- 1 file changed, 551 insertions(+), 844 deletions(-) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp index 3d1775972..8b3d8f833 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -13,11 +13,7 @@ #include #include #include - -// Temporary, until test_OperatorEquals doesn't work for all formats. -#include -#include -#include +#include #ifdef HAVE_GTEST #include @@ -25,92 +21,93 @@ template< typename MatrixHostFloat, typename MatrixHostInt > void host_test_GetType() { - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename MatrixCudaFloat, typename MatrixCudaInt > void cuda_test_GetType() { - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename Matrix > void test_SetDimensions() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 9; - const IndexType cols = 8; + const IndexType rows = 9; + const IndexType cols = 8; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + Matrix m2( rows, cols ); + EXPECT_EQ( m2.getRows(), 9 ); + EXPECT_EQ( m2.getColumns(), 8 ); } template< typename Matrix > void test_SetCompressedRowLengths() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - const IndexType rows = 10; - const IndexType cols = 11; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setValue( 3 ); + const IndexType rows = 10; + const IndexType cols = 11; - IndexType rowLength = 1; - for( IndexType i = 2; i < rows; i++ ) - rowLengths.setElement( i, rowLength++ ); + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); - // Insert values into the rows. + m.setCompressedRowLengths( rowLengths ); - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, 1 ); + // Insert values into the rows. + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, 1 ); - for( IndexType i = 0; i < 3; i++ ) // 1st row - m.setElement( 1, i, 1 ); + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, 1 ); - for( IndexType i = 0; i < 1; i++ ) // 2nd row - m.setElement( 2, i, 1 ); + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); - for( IndexType i = 0; i < 2; i++ ) // 3rd row - m.setElement( 3, i, 1 ); + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, 1 ); - for( IndexType i = 0; i < 3; i++ ) // 4th row - m.setElement( 4, i, 1 ); + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, 1 ); - for( IndexType i = 0; i < 4; i++ ) // 5th row - m.setElement( 5, i, 1 ); + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, 1 ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m.setElement( 6, i, 1 ); + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, 1 ); - for( IndexType i = 0; i < 6; i++ ) // 7th row - m.setElement( 7, i, 1 ); + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, 1 ); - for( IndexType i = 0; i < 7; i++ ) // 8th row - m.setElement( 8, i, 1 ); + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, 1 ); - for( IndexType i = 0; i < 8; i++ ) // 9th row - m.setElement( 9, i, 1 ); + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, 1 ); rowLengths = 0; m.getCompressedRowLengths( rowLengths ); @@ -121,26 +118,20 @@ void test_SetCompressedRowLengths() template< typename Matrix1, typename Matrix2 > void test_SetLike() { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; - - const IndexType rows = 8; - const IndexType cols = 7; - - Matrix1 m1; - m1.reset(); - m1.setDimensions( rows + 1, cols + 2 ); + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - Matrix2 m2; - m2.reset(); - m2.setDimensions( rows, cols ); + const IndexType rows = 8; + const IndexType cols = 7; - m1.setLike( m2 ); + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + m1.setLike( m2 ); - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } template< typename Matrix > @@ -168,10 +159,7 @@ void test_GetNumberOfNonzeroMatrixElements() const IndexType rows = 10; const IndexType cols = 10; - Matrix m; - m.reset(); - - m.setDimensions( rows, cols ); + Matrix m( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); @@ -180,9 +168,8 @@ void test_GetNumberOfNonzeroMatrixElements() rowLengths.setElement( 2, 1 ); rowLengths.setElement( 3, 1 ); for( IndexType i = 4; i < rows - 2; i++ ) - { rowLengths.setElement( i, 1 ); - } + rowLengths.setElement( 8, 1 ); rowLengths.setElement( 9, 1 ); m.setCompressedRowLengths( rowLengths ); @@ -203,10 +190,8 @@ void test_GetNumberOfNonzeroMatrixElements() m.setElement( i, 0, 1 ); for( IndexType j = 8; j < rows; j++) - { for( IndexType i = 0; i < cols; i++ ) m.setElement( j, i, 1 ); - } EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); } @@ -214,405 +199,395 @@ void test_GetNumberOfNonzeroMatrixElements() template< typename Matrix > void test_Reset() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 5x4 sparse matrix: - * - * / 0 0 0 0 \ - * | 0 0 0 0 | - * | 0 0 0 0 | - * | 0 0 0 0 | - * \ 0 0 0 0 / - */ - - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ - m.reset(); + const IndexType rows = 5; + const IndexType cols = 4; + Matrix m( rows, cols ); + m.reset(); - EXPECT_EQ( m.getRows(), 0 ); - EXPECT_EQ( m.getColumns(), 0 ); + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); } template< typename Matrix > void test_GetRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 1 0 1 0 1 0 0 0 \ - * | 1 1 1 0 0 0 0 0 0 0 | - * | 1 1 1 1 1 1 1 1 0 0 | - * | 1 1 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 1 1 1 1 1 1 1 1 1 | - * \ 1 1 1 1 1 1 1 1 1 1 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); - for( IndexType i = 4; i < rows - 2; i++ ) - { - rowLengths.setElement( i, 1 ); - } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); - - auto matrixView = m.getView(); - auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { - auto row = matrixView.getRow( rowIdx ); - switch( rowIdx ) - { - case 0: - for( IndexType i = 0; i < 4; i++ ) - row.setElement( i, 2 * i, 1 ); - break; - case 1: - for( IndexType i = 0; i < 3; i++ ) - row.setElement( i, i, 1 ); - break; - case 2: - for( IndexType i = 0; i < 8; i++ ) + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + rowLengths.setElement( i, 1 ); + + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + switch( rowIdx ) + { + case 0: + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, 1 ); + break; + case 1: + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, 1 ); + break; + case 2: + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, 1 ); + break; + case 3: + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, 1 ); + break; + case 4: + row.setElement( 0, 0, 1 ); + break; + case 5: + row.setElement( 0, 0, 1 ); + break; + case 6: + row.setElement( 0, 0, 1 ); + break; + case 7: + row.setElement( 0, 0, 1 ); + break; + case 8: + for( IndexType i = 0; i < rows; i++ ) row.setElement( i, i, 1 ); break; - case 3: - for( IndexType i = 0; i < 2; i++ ) + case 9: + for( IndexType i = 0; i < rows; i++ ) row.setElement( i, i, 1 ); break; - case 4: - row.setElement( 0, 0, 1 ); - break; - case 5: - row.setElement( 0, 0, 1 ); - break; - case 6: - row.setElement( 0, 0, 1 ); - break; - case 7: - row.setElement( 0, 0, 1 ); - break; - case 8: - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, 1 ); - break; - case 9: - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, 1 ); - break; - } - }; - TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 1 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 1 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 1 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 1 ); - EXPECT_EQ( m.getElement( 1, 1 ), 1 ); - EXPECT_EQ( m.getElement( 1, 2 ), 1 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 1 ); - EXPECT_EQ( m.getElement( 2, 1 ), 1 ); - EXPECT_EQ( m.getElement( 2, 2 ), 1 ); - EXPECT_EQ( m.getElement( 2, 3 ), 1 ); - EXPECT_EQ( m.getElement( 2, 4 ), 1 ); - EXPECT_EQ( m.getElement( 2, 5 ), 1 ); - EXPECT_EQ( m.getElement( 2, 6 ), 1 ); - EXPECT_EQ( m.getElement( 2, 7 ), 1 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 1 ); - EXPECT_EQ( m.getElement( 3, 1 ), 1 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 1 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 1 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 1 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 1 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 1 ); - EXPECT_EQ( m.getElement( 8, 1 ), 1 ); - EXPECT_EQ( m.getElement( 8, 2 ), 1 ); - EXPECT_EQ( m.getElement( 8, 3 ), 1 ); - EXPECT_EQ( m.getElement( 8, 4 ), 1 ); - EXPECT_EQ( m.getElement( 8, 5 ), 1 ); - EXPECT_EQ( m.getElement( 8, 6 ), 1 ); - EXPECT_EQ( m.getElement( 8, 7 ), 1 ); - EXPECT_EQ( m.getElement( 8, 8 ), 1 ); - EXPECT_EQ( m.getElement( 8, 9 ), 1 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 1 ); - EXPECT_EQ( m.getElement( 9, 1 ), 1 ); - EXPECT_EQ( m.getElement( 9, 2 ), 1 ); - EXPECT_EQ( m.getElement( 9, 3 ), 1 ); - EXPECT_EQ( m.getElement( 9, 4 ), 1 ); - EXPECT_EQ( m.getElement( 9, 5 ), 1 ); - EXPECT_EQ( m.getElement( 9, 6 ), 1 ); - EXPECT_EQ( m.getElement( 9, 7 ), 1 ); - EXPECT_EQ( m.getElement( 9, 8 ), 1 ); - EXPECT_EQ( m.getElement( 9, 9 ), 1 ); + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); } template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 1 0 1 0 1 0 0 0 \ - * | 1 1 1 0 0 0 0 0 0 0 | - * | 1 1 1 1 1 1 1 1 0 0 | - * | 1 1 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 0 0 0 0 0 0 0 0 0 | - * | 1 1 1 1 1 1 1 1 1 1 | - * \ 1 1 1 1 1 1 1 1 1 1 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m; - m.reset(); - - m.setDimensions( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); - for( IndexType i = 4; i < rows - 2; i++ ) - { - rowLengths.setElement( i, 1 ); - } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); - - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, 1 ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, 1 ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, 1 ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, 1 ); - - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, 1 ); - - for( IndexType j = 8; j < rows; j++) - { - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, 1 ); - } - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 1 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 1 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 1 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 1 ); - EXPECT_EQ( m.getElement( 1, 1 ), 1 ); - EXPECT_EQ( m.getElement( 1, 2 ), 1 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 1 ); - EXPECT_EQ( m.getElement( 2, 1 ), 1 ); - EXPECT_EQ( m.getElement( 2, 2 ), 1 ); - EXPECT_EQ( m.getElement( 2, 3 ), 1 ); - EXPECT_EQ( m.getElement( 2, 4 ), 1 ); - EXPECT_EQ( m.getElement( 2, 5 ), 1 ); - EXPECT_EQ( m.getElement( 2, 6 ), 1 ); - EXPECT_EQ( m.getElement( 2, 7 ), 1 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 1 ); - EXPECT_EQ( m.getElement( 3, 1 ), 1 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 1 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 1 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 1 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 1 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 1 ); - EXPECT_EQ( m.getElement( 8, 1 ), 1 ); - EXPECT_EQ( m.getElement( 8, 2 ), 1 ); - EXPECT_EQ( m.getElement( 8, 3 ), 1 ); - EXPECT_EQ( m.getElement( 8, 4 ), 1 ); - EXPECT_EQ( m.getElement( 8, 5 ), 1 ); - EXPECT_EQ( m.getElement( 8, 6 ), 1 ); - EXPECT_EQ( m.getElement( 8, 7 ), 1 ); - EXPECT_EQ( m.getElement( 8, 8 ), 1 ); - EXPECT_EQ( m.getElement( 8, 9 ), 1 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 1 ); - EXPECT_EQ( m.getElement( 9, 1 ), 1 ); - EXPECT_EQ( m.getElement( 9, 2 ), 1 ); - EXPECT_EQ( m.getElement( 9, 3 ), 1 ); - EXPECT_EQ( m.getElement( 9, 4 ), 1 ); - EXPECT_EQ( m.getElement( 9, 5 ), 1 ); - EXPECT_EQ( m.getElement( 9, 6 ), 1 ); - EXPECT_EQ( m.getElement( 9, 7 ), 1 ); - EXPECT_EQ( m.getElement( 9, 8 ), 1 ); - EXPECT_EQ( m.getElement( 9, 9 ), 1 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + rowLengths.setElement( i, 1 ); + + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, 1 ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, 1 ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, 1 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); } template< typename Matrix > @@ -648,7 +623,7 @@ void test_VectorProduct() m_1.setElement( 1, 1, 1 ); // 1st row m_1.setElement( 1, 3, 1 ); - + m_1.setElement( 2, 1, 1 ); // 2nd row m_1.setElement( 3, 2, 1 ); // 3rd row @@ -690,12 +665,12 @@ void test_VectorProduct() for( IndexType i = 0; i < 3; i++ ) // 0th row m_2.setElement( 0, i, 1 ); - m_2.setElement( 1, 3, 1 ); // 1st row + m_2.setElement( 1, 3, 1 ); // 1st row for( IndexType i = 0; i < 3; i++ ) // 2nd row m_2.setElement( 2, i, 1 ); - for( IndexType i = 1; i < 2; i++ ) // 3rd row + for( IndexType i = 1; i < 2; i++ ) // 3rd row m_2.setElement( 3, i, 1 ); VectorType inVector_2( m_cols_2 ); @@ -706,13 +681,11 @@ void test_VectorProduct() m_2.vectorProduct( inVector_2, outVector_2 ); - EXPECT_EQ( outVector_2.getElement( 0 ), 6 ); EXPECT_EQ( outVector_2.getElement( 1 ), 2 ); EXPECT_EQ( outVector_2.getElement( 2 ), 6 ); EXPECT_EQ( outVector_2.getElement( 3 ), 2 ); - /* * Sets up the following 4x4 sparse matrix: * @@ -943,32 +916,32 @@ void test_RowsReduction() for( IndexType i = 0; i < 3; i++ ) // 0th row m.setElement( 0, i, 1 ); - m.setElement( 0, 4, 1 ); // 0th row + m.setElement( 0, 4, 1 ); // 0th row m.setElement( 0, 5, 1 ); - m.setElement( 1, 1, 1 ); // 1st row + m.setElement( 1, 1, 1 ); // 1st row m.setElement( 1, 3, 1 ); - for( IndexType i = 1; i < 3; i++ ) // 2nd row + for( IndexType i = 1; i < 3; i++ ) // 2nd row m.setElement( 2, i, 1 ); - m.setElement( 2, 4, 1 ); // 2nd row + m.setElement( 2, 4, 1 ); // 2nd row - for( IndexType i = 1; i < 5; i++ ) // 3rd row + for( IndexType i = 1; i < 5; i++ ) // 3rd row m.setElement( 3, i, 1 ); - m.setElement( 4, 1, 1 ); // 4th row + m.setElement( 4, 1, 1 ); // 4th row - for( IndexType i = 1; i < 7; i++ ) // 5th row + for( IndexType i = 1; i < 7; i++ ) // 5th row m.setElement( 5, i, 1 ); - for( IndexType i = 0; i < 7; i++ ) // 6th row + for( IndexType i = 0; i < 7; i++ ) // 6th row m.setElement( 6, i, 1 ); - for( IndexType i = 0; i < 8; i++ ) // 7th row + for( IndexType i = 0; i < 8; i++ ) // 7th row m.setElement( 7, i, 1 ); - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows m.setElement( i, 7, 1); //// @@ -1085,268 +1058,6 @@ void test_PerformSORIteration() EXPECT_EQ( xVector[ 3 ], 0.25 ); } -// This test is only for AdEllpack -template< typename Matrix > -void test_OperatorEquals() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) - return; - else - { - using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; - using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ - - const IndexType m_rows = 8; - const IndexType m_cols = 8; - - AdELL_host m_host; - - m_host.reset(); - m_host.setDimensions( m_rows, m_cols ); - typename AdELL_host::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setElement(0, 6); - rowLengths.setElement(1, 3); - rowLengths.setElement(2, 4); - rowLengths.setElement(3, 5); - rowLengths.setElement(4, 2); - rowLengths.setElement(5, 7); - rowLengths.setElement(6, 8); - rowLengths.setElement(7, 8); - m_host.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_host.setElement( 0, i, value++ ); - - m_host.setElement( 0, 4, value++ ); // 0th row - m_host.setElement( 0, 5, value++ ); - - m_host.setElement( 1, 1, value++ ); // 1st row - m_host.setElement( 1, 3, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_host.setElement( 2, i, value++ ); - - m_host.setElement( 2, 4, value++ ); // 2nd row - - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_host.setElement( 3, i, value++ ); - - m_host.setElement( 4, 1, value++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_host.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_host.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_host.setElement( 7, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 - m_host.setElement( i, 7, 1); - - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - AdELL_cuda m_cuda; - - // Copy the host matrix into the cuda matrix - m_cuda = m_host; - - // Reset the host matrix - m_host.reset(); - - // Copy the cuda matrix back into the host matrix - m_host = m_cuda; - - // Check the newly created double-copy host matrix - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - // Try vectorProduct with copied cuda matrix to see if it works correctly. - using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; - - VectorType inVector; - inVector.setSize( m_cols ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) - inVector.setElement( i, 2 ); - - VectorType outVector; - outVector.setSize( m_rows ); - for( IndexType j = 0; j < outVector.getSize(); j++ ) - outVector.setElement( j, 0 ); - - m_cuda.vectorProduct( inVector, outVector ); - - EXPECT_EQ( outVector.getElement( 0 ), 32 ); - EXPECT_EQ( outVector.getElement( 1 ), 28 ); - EXPECT_EQ( outVector.getElement( 2 ), 56 ); - EXPECT_EQ( outVector.getElement( 3 ), 102 ); - EXPECT_EQ( outVector.getElement( 4 ), 32 ); - EXPECT_EQ( outVector.getElement( 5 ), 224 ); - EXPECT_EQ( outVector.getElement( 6 ), 352 ); - EXPECT_EQ( outVector.getElement( 7 ), 520 ); - } -} - template< typename Matrix > void test_SaveAndLoad( const char* filename ) { @@ -1363,148 +1074,144 @@ void test_SaveAndLoad( const char* filename ) * \ 0 1 1 1 / */ - const IndexType m_rows = 4; - const IndexType m_cols = 4; + const IndexType m_rows = 4; + const IndexType m_cols = 4; - Matrix savedMatrix; - savedMatrix.reset(); - savedMatrix.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - savedMatrix.setCompressedRowLengths( rowLengths ); + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - savedMatrix.setElement( 0, i, 1 ); + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, 1 ); - savedMatrix.setElement( 1, 1, 1 ); - savedMatrix.setElement( 1, 3, 1 ); // 1st row + savedMatrix.setElement( 1, 1, 1 ); + savedMatrix.setElement( 1, 3, 1 ); // 1st row - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - savedMatrix.setElement( 2, i, 1 ); + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, 1 ); - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - savedMatrix.setElement( 3, i, 1 ); + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, 1 ); - ASSERT_NO_THROW( savedMatrix.save( filename ) ); + ASSERT_NO_THROW( savedMatrix.save( filename ) ); - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2; - rowLengths2.setSize( m_rows ); - rowLengths2.setValue( 3 ); - loadedMatrix.setCompressedRowLengths( rowLengths2 ); + Matrix loadedMatrix; + loadedMatrix.reset(); + loadedMatrix.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths2; + rowLengths2.setSize( m_rows ); + rowLengths2.setValue( 3 ); + loadedMatrix.setCompressedRowLengths( rowLengths2 ); - ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 1 ); - EXPECT_EQ( std::remove( filename ), 0 ); + EXPECT_EQ( std::remove( filename ), 0 ); } template< typename Matrix > void test_Print() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 5x4 sparse matrix: - * - * / 1 1 1 0 \ - * | 0 0 0 1 | - * | 1 1 1 0 | - * | 0 1 1 1 | - * \ 0 0 1 1 / - */ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType m_rows = 5; - const IndexType m_cols = 4; + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 0 0 1 | + * | 1 1 1 0 | + * | 0 1 1 1 | + * \ 0 0 1 1 / + */ - Matrix m; - m.reset(); - m.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); + const IndexType m_rows = 5; + const IndexType m_cols = 4; - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, 1 ); + Matrix m; + m.reset(); + m.setDimensions( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); - m.setElement( 1, 3, 1 ); // 1st row + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, 1 ); - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, 1 ); + m.setElement( 1, 3, 1 ); // 1st row - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, 1 ); + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, 1 ); + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, 1 ); - #include - std::stringstream printed; - std::stringstream couted; + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, 1 ); - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + std::stringstream printed; + std::stringstream couted; - m.print( std::cout ); //all the std::cout goes to ss + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); - std::cout.rdbuf(old_buf); //reset + m.print( std::cout ); //all the std::cout goes to ss - couted << "Row: 0 -> Col:0->1 Col:1->1 Col:2->1\t\n" - "Row: 1 -> Col:3->1\t\n" - "Row: 2 -> Col:0->1 Col:1->1 Col:2->1\t\n" - "Row: 3 -> Col:1->1 Col:2->1 Col:3->1\t\n" - "Row: 4 -> Col:2->1 Col:3->1\t\n"; + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 1 -> Col:3->1\t\n" + "Row: 2 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 3 -> Col:1->1 Col:2->1 Col:3->1\t\n" + "Row: 4 -> Col:2->1 Col:3->1\t\n"; - EXPECT_EQ( printed.str(), couted.str() ); + EXPECT_EQ( printed.str(), couted.str() ); } #endif -- GitLab From 9ee9e37674d0a4ff7d341f97b7c569b206a55569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 4 Feb 2020 21:23:45 +0100 Subject: [PATCH 130/179] Added binary sparse matrix unit tests for Ellpack and SlicedEllpack. --- ...Test.cu => BinarySparseMatrixTest_CSR.cpp} | 4 +- ...Test.cpp => BinarySparseMatrixTest_CSR.cu} | 4 +- ...rixTest.h => BinarySparseMatrixTest_CSR.h} | 28 ++-- .../BinarySparseMatrixTest_Ellpack.cpp | 11 ++ .../BinarySparseMatrixTest_Ellpack.cu | 11 ++ .../Matrices/BinarySparseMatrixTest_Ellpack.h | 146 ++++++++++++++++++ .../BinarySparseMatrixTest_SlicedEllpack.cpp | 11 ++ .../BinarySparseMatrixTest_SlicedEllpack.cu | 11 ++ .../BinarySparseMatrixTest_SlicedEllpack.h | 146 ++++++++++++++++++ src/UnitTests/Matrices/CMakeLists.txt | 28 +++- .../Matrices/SparseMatrixTest_CSR.cu | 2 +- src/UnitTests/Matrices/SparseMatrixTest_CSR.h | 2 +- .../Matrices/SparseMatrixTest_Ellpack.cu | 2 +- .../Matrices/SparseMatrixTest_Ellpack.h | 2 +- .../SparseMatrixTest_SlicedEllpack.cu | 2 +- 15 files changed, 381 insertions(+), 29 deletions(-) rename src/UnitTests/Matrices/{BinarySparseMatrixTest.cu => BinarySparseMatrixTest_CSR.cpp} (78%) rename src/UnitTests/Matrices/{BinarySparseMatrixTest.cpp => BinarySparseMatrixTest_CSR.cu} (78%) rename src/UnitTests/Matrices/{BinarySparseMatrixTest.h => BinarySparseMatrixTest_CSR.h} (87%) create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp similarity index 78% rename from src/UnitTests/Matrices/BinarySparseMatrixTest.cu rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp index 916f14360..b1f489105 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.cu +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp @@ -1,5 +1,5 @@ /*************************************************************************** - BinarySparseMatrixTest.cu - description + BinarySparseMatrixTest_CSR.cpp - description ------------------- begin : Jan 30, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. @@ -8,4 +8,4 @@ /* See Copyright Notice in tnl/Copyright */ -#include "BinarySparseMatrixTest.h" \ No newline at end of file +#include "BinarySparseMatrixTest_CSR.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu similarity index 78% rename from src/UnitTests/Matrices/BinarySparseMatrixTest.cpp rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu index ea7b8d3c9..496bdde1b 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.cpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu @@ -1,5 +1,5 @@ /*************************************************************************** - BinarySparseMatrixTest.cpp - description + BinarySparseMatrixTest_CSR.cu - description ------------------- begin : Jan 30, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. @@ -8,4 +8,4 @@ /* See Copyright Notice in tnl/Copyright */ -#include "BinarySparseMatrixTest.h" \ No newline at end of file +#include "BinarySparseMatrixTest_CSR.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h similarity index 87% rename from src/UnitTests/Matrices/BinarySparseMatrixTest.h rename to src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h index cb0d0bab5..9cd52741a 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -1,5 +1,5 @@ /*************************************************************************** - BinarySparseMatrixTest.h - description + BinarySparseMatrixTest_CSR.h - description ------------------- begin : Jan 30, 2020 copyright : (C) 2020 by Tomas Oberhuber et al. @@ -20,7 +20,7 @@ // test fixture for typed tests template< typename Matrix > -class CSRMatrixTest : public ::testing::Test +class BinaryMatrixTest_CSR : public ::testing::Test { protected: using CSRMatrixType = Matrix; @@ -57,37 +57,37 @@ using CSRMatrixTypes = ::testing::Types #endif >; -TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); +TYPED_TEST_SUITE( BinaryMatrixTest_CSR, CSRMatrixTypes); -TYPED_TEST( CSRMatrixTest, setDimensionsTest ) +TYPED_TEST( BinaryMatrixTest_CSR, setDimensionsTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_SetDimensions< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +TYPED_TEST( BinaryMatrixTest_CSR, setCompressedRowLengthsTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_SetCompressedRowLengths< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setLikeTest ) +TYPED_TEST( BinaryMatrixTest_CSR, setLikeTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_SetLike< CSRMatrixType, CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, resetTest ) +TYPED_TEST( BinaryMatrixTest_CSR, resetTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_Reset< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, getRowTest ) +TYPED_TEST( BinaryMatrixTest_CSR, getRowTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -95,35 +95,35 @@ TYPED_TEST( CSRMatrixTest, getRowTest ) } -TYPED_TEST( CSRMatrixTest, setElementTest ) +TYPED_TEST( BinaryMatrixTest_CSR, setElementTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_SetElement< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, vectorProductTest ) +TYPED_TEST( BinaryMatrixTest_CSR, vectorProductTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_VectorProduct< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, rowsReduction ) +TYPED_TEST( BinaryMatrixTest_CSR, rowsReduction ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; test_RowsReduction< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest" ); + test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" ); } -TYPED_TEST( CSRMatrixTest, printTest ) +TYPED_TEST( BinaryMatrixTest_CSR, printTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp new file mode 100644 index 000000000..b1d5d71cf --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_Ellpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu new file mode 100644 index 000000000..8d075f1cf --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_Ellpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h new file mode 100644 index 000000000..708bd85f0 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -0,0 +1,146 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "BinarySparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class BinaryMatrixTest_Ellpack : public ::testing::Test +{ +protected: + using EllpackMatrixType = Matrix; +}; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAlocator > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; + +// types for which MatrixTest is instantiated +using EllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack > +#endif +>; + +TYPED_TEST_SUITE( BinaryMatrixTest_Ellpack, EllpackMatrixTypes); + +TYPED_TEST( BinaryMatrixTest_Ellpack, setDimensionsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetDimensions< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, setCompressedRowLengthsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetCompressedRowLengths< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, setLikeTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetLike< EllpackMatrixType, EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, resetTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Reset< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, getRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_GetRow< EllpackMatrixType >(); +} + + +TYPED_TEST( BinaryMatrixTest_Ellpack, setElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetElement< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, vectorProductTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_VectorProduct< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, rowsReduction ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_RowsReduction< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" ); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, printTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Print< EllpackMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp new file mode 100644 index 000000000..7046d8156 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_SlicedEllpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu new file mode 100644 index 000000000..bb6829310 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_SlicedEllpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h new file mode 100644 index 000000000..7ebc25968 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -0,0 +1,146 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include + + +#include "BinarySparseMatrixTest.hpp" +#include + +#ifdef HAVE_GTEST +#include + +// test fixture for typed tests +template< typename Matrix > +class BinaryMatrixTest_SlicedEllpack : public ::testing::Test +{ +protected: + using SlicedEllpackMatrixType = Matrix; +}; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; + +// types for which MatrixTest is instantiated +using SlicedEllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack > +#endif +>; + +TYPED_TEST_SUITE( BinaryMatrixTest_SlicedEllpack, SlicedEllpackMatrixTypes); + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setDimensionsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetDimensions< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setCompressedRowLengthsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setLikeTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, resetTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Reset< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, getRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_GetRow< SlicedEllpackMatrixType >(); +} + + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, vectorProductTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_VectorProduct< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, rowsReduction ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_RowsReduction< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" ); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Print< SlicedEllpackMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index c4b2fabd3..60a01eaf5 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -25,8 +25,14 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) ELSE( BUILD_CUDA ) ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) @@ -61,9 +67,17 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( BinarySparseMatrixTest BinarySparseMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( BinarySparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) @@ -75,7 +89,9 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( BinarySparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu index 771c74b9a..258ad2c53 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu @@ -1 +1 @@ -#include "SparseMatrixTest_CSR_segments.h" +#include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index 1e89d544a..781735e7f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_CSR_segments.h - description + SparseMatrixTest_CSR.h - description ------------------- begin : Dec 2, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu index 63219e9b0..c454706f0 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu @@ -1 +1 @@ -#include "SparseMatrixTest_Ellpack_segments.h" +#include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 32678c2b0..9650105f6 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_Ellpack_segments.h - description + SparseMatrixTest_Ellpack.h - description ------------------- begin : Dec 3, 2019 copyright : (C) 2019 by Tomas Oberhuber et al. diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu index a88301100..40e2e94b8 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu @@ -1 +1 @@ -#include "SparseMatrixTest_SlicedEllpack_segments.h" +#include "SparseMatrixTest_SlicedEllpack.h" -- GitLab From f0b9843967c5808feb13de2e83124d81f7c92c3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Wed, 5 Feb 2020 20:03:19 +0100 Subject: [PATCH 131/179] Added binary sparse matrix copy test. --- src/TNL/Matrices/SparseMatrixView.hpp | 3 +- .../Matrices/BinarySparseMatrixCopyTest.cpp | 11 + .../Matrices/BinarySparseMatrixCopyTest.cu | 11 + .../Matrices/BinarySparseMatrixCopyTest.h | 820 ++++++++++++++++++ src/UnitTests/Matrices/CMakeLists.txt | 10 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 7 - 6 files changed, 853 insertions(+), 9 deletions(-) create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu create mode 100644 src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index afc21788a..16a8bc62f 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -263,7 +263,8 @@ addElement( const IndexType row, } this->columnIndexes.setElement( globalIdx, column ); - this->values.setElement( globalIdx, value ); + if( ! isBinary() ) + this->values.setElement( globalIdx, value ); return; } } diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp new file mode 100644 index 000000000..51d7c4ea9 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixCopyTest.cpp - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu new file mode 100644 index 000000000..f29db9e96 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixCopyTest.cu - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h new file mode 100644 index 000000000..d8fefeed7 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -0,0 +1,820 @@ +/*************************************************************************** + BinaryBinarySparseMatrixCopyTest.h - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >; + + +#ifdef HAVE_GTEST +#include + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 1 1 | + * | 1 1 1 1 1 | + * | 1 1 | + * | 1 | + * | 1 | + * | 1 1 1 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, 1 ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + m.setElement( 6, 0, 1 ); // 6th row + + m.setElement( 7, 0, 1 ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, 1 ); + + m.setElement( 9, 5, 1 ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 1 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 1 ); + EXPECT_EQ( m.getElement( 3, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 1 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, 1 ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 1 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 1 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, 1 ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 1 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 1 ); + EXPECT_EQ( m.getElement( 5, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 1 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix1, typename Matrix2 > +void testConversion() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) + hostMatrix.setElement( i, j, 1 ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + } +#endif +} + +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( j - i ) ) + hostMatrix.setElement( i, j, 1 ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + /*std::cerr << "hostMatrix " << hostMatrix << std::endl; + std::cerr << "matrix " << matrix << std::endl; + std::cerr << "rowCapacities " << rowCapacities << std::endl;*/ + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + } +#endif +} + +TEST( BinarySparseMatrixCopyTest, CSR_HostToHost ) +{ + testCopyAssignment< CSR_host, CSR_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, CSR_HostToCuda ) +{ + testCopyAssignment< CSR_host, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_CudaToHost ) +{ + testCopyAssignment< CSR_cuda, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_CudaToCuda ) +{ + testCopyAssignment< CSR_cuda, CSR_cuda >(); +} +#endif + + +TEST( BinarySparseMatrixCopyTest, Ellpack_HostToHost ) +{ + testCopyAssignment< E_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, Ellpack_HostToCuda ) +{ + testCopyAssignment< E_host, E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToHost ) +{ + testCopyAssignment< E_cuda, E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToCuda ) +{ + testCopyAssignment< E_cuda, E_cuda >(); +} +#endif + + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToHost ) +{ + testCopyAssignment< SE_host, SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToCuda ) +{ + testCopyAssignment< SE_host, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToHost ) +{ + testCopyAssignment< SE_cuda, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) +{ + testCopyAssignment< SE_cuda, SE_cuda >(); +} +#endif + +//// +// Test of conversion between formats +TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_host ) +{ + testConversion< CSR_host, E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_host ) +{ + testConversion< E_host, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_host ) +{ + testConversion< CSR_host, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_host ) +{ + testConversion< SE_host, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host ) +{ + testConversion< E_host, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host ) +{ + testConversion< SE_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_cuda ) +{ + testConversion< CSR_cuda, E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_cuda ) +{ + testConversion< E_cuda, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda ) +{ + testConversion< CSR_cuda, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda ) +{ + testConversion< SE_cuda, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda ) +{ + testConversion< E_cuda, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) +{ + testConversion< SE_cuda, E_cuda >(); +} +#endif + +//// +// Tridiagonal matrix assignment test +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host ) +{ + tridiagonalMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host ) +{ + tridiagonalMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + tridiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda ) +{ + tridiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + tridiagonalMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + tridiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Multidiagonal matrix assignment test +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host ) +{ + multidiagonalMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host ) +{ + multidiagonalMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + multidiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda ) +{ + multidiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + multidiagonalMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + multidiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Dense matrix assignment test +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) +{ + denseMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host ) +{ + denseMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host ) +{ + denseMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda ) +{ + denseMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda ) +{ + denseMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda ) +{ + denseMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST + +#include "../main.h" diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 60a01eaf5..8da67ef6b 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -34,6 +34,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -79,9 +82,12 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) -ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -89,9 +95,11 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 053f1e9fb..829c30677 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -21,13 +21,6 @@ #include #include -/*using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; -using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; -using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; -using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; -using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; -using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >;*/ - template< typename Device, typename Index, typename IndexAllocator > using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; -- GitLab From 3b631afcbb877516e42a9650b59470c368ea8365 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 6 Feb 2020 12:30:19 +0100 Subject: [PATCH 132/179] Fixed --- src/TNL/Matrices/SparseMatrix.hpp | 6 ++++-- .../Matrices/BinarySparseMatrixCopyTest.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 60f4695f0..992443434 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -590,7 +590,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); columns_view[ thisGlobalIdx ] = columnIdx; - values_view[ thisGlobalIdx ] = value; + if( ! isBinary() ) + values_view[ thisGlobalIdx ] = value; } }; matrix.forAllRows( f ); @@ -700,7 +701,8 @@ operator=( const RHSMatrix& matrix ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; - values_view[ thisGlobalIdx ] = value; + if( ! isBinary() ) + values_view[ thisGlobalIdx ] = value; rowLocalIndexes_view[ rowIdx ] = localIdx; } }; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h index d8fefeed7..b901acbbd 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -440,7 +440,7 @@ void tridiagonalMatrixAssignment() TridiagonalHost hostMatrix( rows, columns ); for( IndexType i = 0; i < rows; i++ ) for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) - hostMatrix.setElement( i, j, 1 ); + hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) ); Matrix matrix; matrix = hostMatrix; @@ -456,7 +456,7 @@ void tridiagonalMatrixAssignment() if( abs( i - j ) > 1 ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); } #ifdef HAVE_CUDA @@ -471,7 +471,7 @@ void tridiagonalMatrixAssignment() if( abs( i - j ) > 1 ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); } #endif } @@ -493,7 +493,7 @@ void multidiagonalMatrixAssignment() for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < columns; j++ ) if( diagonals.containsValue( j - i ) ) - hostMatrix.setElement( i, j, 1 ); + hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) ); Matrix matrix; matrix = hostMatrix; @@ -510,7 +510,7 @@ void multidiagonalMatrixAssignment() for( IndexType j = 0; j < columns; j++ ) { if( diagonals.containsValue( j - i ) ) - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } @@ -525,7 +525,7 @@ void multidiagonalMatrixAssignment() for( IndexType j = 0; j < columns; j++ ) { if( diagonals.containsValue( j - i ) ) - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); else EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); } @@ -546,7 +546,7 @@ void denseMatrixAssignment() DenseHost hostMatrix( rows, columns ); for( IndexType i = 0; i < columns; i++ ) for( IndexType j = 0; j <= i; j++ ) - hostMatrix( i, j ) = i + j; + hostMatrix( i, j ) = TNL::min( i + j, 1 ); Matrix matrix; matrix = hostMatrix; @@ -561,7 +561,7 @@ void denseMatrixAssignment() if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); } #ifdef HAVE_CUDA @@ -576,7 +576,7 @@ void denseMatrixAssignment() if( j > i ) EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); else - EXPECT_EQ( matrix.getElement( i, j ), 1.0 ); + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); } #endif } -- GitLab From 1ac668ecd333fe990072b2a5bdf2dd71d80df4c8 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 7 Feb 2020 13:25:47 +0100 Subject: [PATCH 133/179] Refactoring SparseMatrixTest. --- src/UnitTests/Matrices/SparseMatrixTest.h | 2240 +++++++++------------ 1 file changed, 925 insertions(+), 1315 deletions(-) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index b6b5a368f..04a9b065f 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -1,5 +1,5 @@ /*************************************************************************** - SparseMatrixTest_impl.h - description + SparseMatrixTest.h - description ------------------- begin : Nov 22, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. @@ -13,11 +13,7 @@ #include #include #include - -// Temporary, until test_OperatorEquals doesn't work for all formats. -#include -#include -#include +#include #ifdef HAVE_GTEST #include @@ -25,93 +21,91 @@ template< typename MatrixHostFloat, typename MatrixHostInt > void host_test_GetType() { - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename MatrixCudaFloat, typename MatrixCudaInt > void cuda_test_GetType() { - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename Matrix > void test_SetDimensions() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 9; - const IndexType cols = 8; + const IndexType rows = 9; + const IndexType cols = 8; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m; + m.setDimensions( rows, cols ); - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); } template< typename Matrix > void test_SetCompressedRowLengths() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 10; - const IndexType cols = 11; + const IndexType rows = 10; + const IndexType cols = 11; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setValue( 3 ); + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths = 3; - IndexType rowLength = 1; - for( IndexType i = 2; i < rows; i++ ) - rowLengths.setElement( i, rowLength++ ); + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); - m.setCompressedRowLengths( rowLengths ); + m.setCompressedRowLengths( rowLengths ); - // Insert values into the rows. - RealType value = 1; + // Insert values into the rows. + RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 1st row - m.setElement( 1, i, value++ ); + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); - for( IndexType i = 0; i < 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); - for( IndexType i = 0; i < 2; i++ ) // 3rd row - m.setElement( 3, i, value++ ); + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); - for( IndexType i = 0; i < 3; i++ ) // 4th row - m.setElement( 4, i, value++ ); + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); - for( IndexType i = 0; i < 4; i++ ) // 5th row - m.setElement( 5, i, value++ ); + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m.setElement( 6, i, value++ ); + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); - for( IndexType i = 0; i < 6; i++ ) // 7th row - m.setElement( 7, i, value++ ); + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); - for( IndexType i = 0; i < 7; i++ ) // 8th row - m.setElement( 8, i, value++ ); + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); - for( IndexType i = 0; i < 8; i++ ) // 9th row - m.setElement( 9, i, value++ ); + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); rowLengths = 0; m.getCompressedRowLengths( rowLengths ); @@ -122,26 +116,20 @@ void test_SetCompressedRowLengths() template< typename Matrix1, typename Matrix2 > void test_SetLike() { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; - - const IndexType rows = 8; - const IndexType cols = 7; - - Matrix1 m1; - m1.reset(); - m1.setDimensions( rows + 1, cols + 2 ); + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - Matrix2 m2; - m2.reset(); - m2.setDimensions( rows, cols ); + const IndexType rows = 8; + const IndexType cols = 7; - m1.setLike( m2 ); + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + m1.setLike( m2 ); - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } template< typename Matrix > @@ -169,23 +157,9 @@ void test_GetNumberOfNonzeroMatrixElements() const IndexType rows = 10; const IndexType cols = 10; - Matrix m; - m.reset(); + Matrix m( rows, cols ); - m.setDimensions( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); - for( IndexType i = 4; i < rows - 2; i++ ) - { - rowLengths.setElement( i, 1 ); - } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; m.setCompressedRowLengths( rowLengths ); RealType value = 1; @@ -205,10 +179,8 @@ void test_GetNumberOfNonzeroMatrixElements() m.setElement( i, 0, value++ ); for( IndexType j = 8; j < rows; j++) - { for( IndexType i = 0; i < cols; i++ ) m.setElement( j, i, value++ ); - } EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); } @@ -216,81 +188,67 @@ void test_GetNumberOfNonzeroMatrixElements() template< typename Matrix > void test_Reset() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 5x4 sparse matrix: - * - * / 0 0 0 0 \ - * | 0 0 0 0 | - * | 0 0 0 0 | - * | 0 0 0 0 | - * \ 0 0 0 0 / - */ - - const IndexType rows = 5; - const IndexType cols = 4; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ - m.reset(); + const IndexType rows = 5; + const IndexType cols = 4; + Matrix m( rows, cols ); + m.reset(); - EXPECT_EQ( m.getRows(), 0 ); - EXPECT_EQ( m.getColumns(), 0 ); + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); } template< typename Matrix > void test_GetRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); - for( IndexType i = 4; i < rows - 2; i++ ) - { - rowLengths.setElement( i, 1 ); - } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); - - auto matrixView = m.getView(); - auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { - auto row = matrixView.getRow( rowIdx ); - RealType val; - switch( rowIdx ) - { - case 0: + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: val = 1; for( IndexType i = 0; i < 4; i++ ) row.setElement( i, 2 * i, val++ ); @@ -323,716 +281,670 @@ void test_GetRow() row.setElement( 0, 0, 21 ); break; case 8: - val = 22; - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, val++ ); - break; + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; case 9: - val = 32; - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, val++ ); - break; - } - }; - TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 2 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 3 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 4 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 5 ); - EXPECT_EQ( m.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m.getElement( 1, 2 ), 7 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 8 ); - EXPECT_EQ( m.getElement( 2, 1 ), 9 ); - EXPECT_EQ( m.getElement( 2, 2 ), 10 ); - EXPECT_EQ( m.getElement( 2, 3 ), 11 ); - EXPECT_EQ( m.getElement( 2, 4 ), 12 ); - EXPECT_EQ( m.getElement( 2, 5 ), 13 ); - EXPECT_EQ( m.getElement( 2, 6 ), 14 ); - EXPECT_EQ( m.getElement( 2, 7 ), 15 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 18 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 19 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 20 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 21 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 22 ); - EXPECT_EQ( m.getElement( 8, 1 ), 23 ); - EXPECT_EQ( m.getElement( 8, 2 ), 24 ); - EXPECT_EQ( m.getElement( 8, 3 ), 25 ); - EXPECT_EQ( m.getElement( 8, 4 ), 26 ); - EXPECT_EQ( m.getElement( 8, 5 ), 27 ); - EXPECT_EQ( m.getElement( 8, 6 ), 28 ); - EXPECT_EQ( m.getElement( 8, 7 ), 29 ); - EXPECT_EQ( m.getElement( 8, 8 ), 30 ); - EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 32 ); - EXPECT_EQ( m.getElement( 9, 1 ), 33 ); - EXPECT_EQ( m.getElement( 9, 2 ), 34 ); - EXPECT_EQ( m.getElement( 9, 3 ), 35 ); - EXPECT_EQ( m.getElement( 9, 4 ), 36 ); - EXPECT_EQ( m.getElement( 9, 5 ), 37 ); - EXPECT_EQ( m.getElement( 9, 6 ), 38 ); - EXPECT_EQ( m.getElement( 9, 7 ), 39 ); - EXPECT_EQ( m.getElement( 9, 8 ), 40 ); - EXPECT_EQ( m.getElement( 9, 9 ), 41 ); + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); } template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m; - m.reset(); - - m.setDimensions( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setElement( 0, 4 ); - rowLengths.setElement( 1, 3 ); - rowLengths.setElement( 2, 8 ); - rowLengths.setElement( 3, 2 ); - for( IndexType i = 4; i < rows - 2; i++ ) - { - rowLengths.setElement( i, 1 ); - } - rowLengths.setElement( 8, 10 ); - rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); - - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); - - for( IndexType j = 8; j < rows; j++) - { - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); - } - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 2 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 3 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 4 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 5 ); - EXPECT_EQ( m.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m.getElement( 1, 2 ), 7 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 8 ); - EXPECT_EQ( m.getElement( 2, 1 ), 9 ); - EXPECT_EQ( m.getElement( 2, 2 ), 10 ); - EXPECT_EQ( m.getElement( 2, 3 ), 11 ); - EXPECT_EQ( m.getElement( 2, 4 ), 12 ); - EXPECT_EQ( m.getElement( 2, 5 ), 13 ); - EXPECT_EQ( m.getElement( 2, 6 ), 14 ); - EXPECT_EQ( m.getElement( 2, 7 ), 15 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 18 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 19 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 20 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 21 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 22 ); - EXPECT_EQ( m.getElement( 8, 1 ), 23 ); - EXPECT_EQ( m.getElement( 8, 2 ), 24 ); - EXPECT_EQ( m.getElement( 8, 3 ), 25 ); - EXPECT_EQ( m.getElement( 8, 4 ), 26 ); - EXPECT_EQ( m.getElement( 8, 5 ), 27 ); - EXPECT_EQ( m.getElement( 8, 6 ), 28 ); - EXPECT_EQ( m.getElement( 8, 7 ), 29 ); - EXPECT_EQ( m.getElement( 8, 8 ), 30 ); - EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 32 ); - EXPECT_EQ( m.getElement( 9, 1 ), 33 ); - EXPECT_EQ( m.getElement( 9, 2 ), 34 ); - EXPECT_EQ( m.getElement( 9, 3 ), 35 ); - EXPECT_EQ( m.getElement( 9, 4 ), 36 ); - EXPECT_EQ( m.getElement( 9, 5 ), 37 ); - EXPECT_EQ( m.getElement( 9, 6 ), 38 ); - EXPECT_EQ( m.getElement( 9, 7 ), 39 ); - EXPECT_EQ( m.getElement( 9, 8 ), 40 ); - EXPECT_EQ( m.getElement( 9, 9 ), 41 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); } template< typename Matrix > void test_AddElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 6x5 sparse matrix: - * - * / 1 2 3 0 0 \ - * | 0 4 5 6 0 | - * | 0 0 7 8 9 | - * | 10 0 0 0 0 | - * | 0 11 0 0 0 | - * \ 0 0 0 12 0 / - */ - - const IndexType rows = 6; - const IndexType cols = 5; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < cols - 2; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 1st row - m.setElement( 1, i, value++ ); - - for( IndexType i = 2; i < cols; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - m.setElement( 3, 0, value++ ); // 3rd row - - m.setElement( 4, 1, value++ ); // 4th row - - m.setElement( 5, 3, value++ ); // 5th row - - - // Check the set elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m.getElement( 1, 1 ), 4 ); - EXPECT_EQ( m.getElement( 1, 2 ), 5 ); - EXPECT_EQ( m.getElement( 1, 3 ), 6 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 7 ); - EXPECT_EQ( m.getElement( 2, 3 ), 8 ); - EXPECT_EQ( m.getElement( 2, 4 ), 9 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 10 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 11 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 12 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. - -/* - * Sets up the following 6x5 sparse matrix: - * - * / 1 2 3 0 0 \ - * | 0 4 5 6 0 | - * | 0 0 7 8 9 | - * | 10 0 0 0 0 | - * | 0 11 0 0 0 | - * \ 0 0 0 12 0 / - */ - -/* - * The following setup results in the following 6x5 sparse matrix: - * - * / 3 6 9 0 0 \ - * | 0 12 15 18 0 | - * | 0 0 21 24 27 | - * | 30 11 12 0 0 | - * | 0 35 14 15 0 | - * \ 0 0 16 41 18 / - */ - - RealType newValue = 1; - for( IndexType i = 0; i < cols - 2; i++ ) // 0th row - m.addElement( 0, i, newValue++, 2.0 ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 1st row - m.addElement( 1, i, newValue++, 2.0 ); - - for( IndexType i = 2; i < cols; i++ ) // 2nd row - m.addElement( 2, i, newValue++, 2.0 ); - - for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row - m.addElement( 3, i, newValue++, 2.0 ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 4th row - m.addElement( 4, i, newValue++, 2.0 ); - - for( IndexType i = 2; i < cols; i++ ) // 5th row - m.addElement( 5, i, newValue++, 2.0 ); - - - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m.getElement( 1, 1 ), 12 ); - EXPECT_EQ( m.getElement( 1, 2 ), 15 ); - EXPECT_EQ( m.getElement( 1, 3 ), 18 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 21 ); - EXPECT_EQ( m.getElement( 2, 3 ), 24 ); - EXPECT_EQ( m.getElement( 2, 4 ), 27 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 30 ); - EXPECT_EQ( m.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 35 ); - EXPECT_EQ( m.getElement( 4, 2 ), 14 ); - EXPECT_EQ( m.getElement( 4, 3 ), 15 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 16 ); - EXPECT_EQ( m.getElement( 5, 3 ), 41 ); - EXPECT_EQ( m.getElement( 5, 4 ), 18 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( rows ); + rowLengths = 3; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 3, 0, value++ ); // 3rd row + + m.setElement( 4, 1, value++ ); // 4th row + + m.setElement( 5, 3, value++ ); // 5th row + + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 0 0 \ + * | 0 12 15 18 0 | + * | 0 0 21 24 27 | + * | 30 11 12 0 0 | + * | 0 35 14 15 0 | + * \ 0 0 16 41 18 / + */ + + RealType newValue = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.addElement( 0, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.addElement( 1, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.addElement( 2, i, newValue++, 2.0 ); + + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row + m.addElement( 3, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row + m.addElement( 4, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 5th row + m.addElement( 5, i, newValue++, 2.0 ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 12 ); + EXPECT_EQ( m.getElement( 1, 2 ), 15 ); + EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 21 ); + EXPECT_EQ( m.getElement( 2, 3 ), 24 ); + EXPECT_EQ( m.getElement( 2, 4 ), 27 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 35 ); + EXPECT_EQ( m.getElement( 4, 2 ), 14 ); + EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 16 ); + EXPECT_EQ( m.getElement( 5, 3 ), 41 ); + EXPECT_EQ( m.getElement( 5, 4 ), 18 ); } template< typename Matrix > void test_VectorProduct() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 0 0 0 \ - * | 0 2 0 3 | - * | 0 4 0 0 | - * \ 0 0 5 0 / - */ + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ - const IndexType m_rows_1 = 4; - const IndexType m_cols_1 = 4; + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; - Matrix m_1; - m_1.reset(); - m_1.setDimensions( m_rows_1, m_cols_1 ); - typename Matrix::CompressedRowLengthsVector rowLengths_1; - rowLengths_1.setSize( m_rows_1 ); - rowLengths_1.setElement( 0, 1 ); - rowLengths_1.setElement( 1, 2 ); - rowLengths_1.setElement( 2, 1 ); - rowLengths_1.setElement( 3, 1 ); - m_1.setCompressedRowLengths( rowLengths_1 ); - - RealType value_1 = 1; - m_1.setElement( 0, 0, value_1++ ); // 0th row - - m_1.setElement( 1, 1, value_1++ ); // 1st row - m_1.setElement( 1, 3, value_1++ ); - - m_1.setElement( 2, 1, value_1++ ); // 2nd row - - m_1.setElement( 3, 2, value_1++ ); // 3rd row - - VectorType inVector_1; - inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) - inVector_1.setElement( i, 2 ); + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); - VectorType outVector_1; - outVector_1.setSize( m_rows_1 ); - for( IndexType j = 0; j < outVector_1.getSize(); j++ ) - outVector_1.setElement( j, 0 ); - - - m_1.vectorProduct( inVector_1, outVector_1 ); + RealType value_1 = 1; + m_1.setElement( 0, 0, value_1++ ); // 0th row + m_1.setElement( 1, 1, value_1++ ); // 1st row + m_1.setElement( 1, 3, value_1++ ); - EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); - EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); - EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); - EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); - + m_1.setElement( 2, 1, value_1++ ); // 2nd row -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * \ 0 8 0 0 / - */ + m_1.setElement( 3, 2, value_1++ ); // 3rd row - const IndexType m_rows_2 = 4; - const IndexType m_cols_2 = 4; + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); - Matrix m_2; - m_2.reset(); - m_2.setDimensions( m_rows_2, m_cols_2 ); - typename Matrix::CompressedRowLengthsVector rowLengths_2; - rowLengths_2.setSize( m_rows_2 ); - rowLengths_2.setValue( 3 ); - rowLengths_2.setElement( 1, 1 ); - rowLengths_2.setElement( 3, 1 ); - m_2.setCompressedRowLengths( rowLengths_2 ); - - RealType value_2 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_2.setElement( 0, i, value_2++ ); - - m_2.setElement( 1, 3, value_2++ ); // 1st row - - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_2.setElement( 2, i, value_2++ ); - - for( IndexType i = 1; i < 2; i++ ) // 3rd row - m_2.setElement( 3, i, value_2++ ); - - VectorType inVector_2; - inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) - inVector_2.setElement( i, 2 ); - - VectorType outVector_2; - outVector_2.setSize( m_rows_2 ); - for( IndexType j = 0; j < outVector_2.getSize(); j++ ) - outVector_2.setElement( j, 0 ); - - - m_2.vectorProduct( inVector_2, outVector_2 ); + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); - EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); - EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); - EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); - + m_1.vectorProduct( inVector_1, outVector_1 ); -/* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 4 5 6 | - * | 7 8 9 0 | - * \ 0 10 11 12 / - */ - const IndexType m_rows_3 = 4; - const IndexType m_cols_3 = 4; + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); - Matrix m_3; - m_3.reset(); - m_3.setDimensions( m_rows_3, m_cols_3 ); - typename Matrix::CompressedRowLengthsVector rowLengths_3; - rowLengths_3.setSize( m_rows_3 ); - rowLengths_3.setValue( 3 ); - m_3.setCompressedRowLengths( rowLengths_3 ); + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; + /*rowLengths_2 = 3; + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 );*/ + m_2.setCompressedRowLengths( rowLengths_2 ); + + RealType value_2 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, value_2++ ); + + m_2.setElement( 1, 3, value_2++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, value_2++ ); - RealType value_3 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_3.setElement( 0, i, value_3++ ); + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, value_2++ ); - for( IndexType i = 1; i < 4; i++ ) - m_3.setElement( 1, i, value_3++ ); // 1st row + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_3.setElement( 2, i, value_3++ ); + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ - for( IndexType i = 1; i < 4; i++ ) // 3rd row - m_3.setElement( 3, i, value_3++ ); + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; - VectorType inVector_3; - inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) - inVector_3.setElement( i, 2 ); + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; + m_3.setCompressedRowLengths( rowLengths_3 ); - VectorType outVector_3; - outVector_3.setSize( m_rows_3 ); - for( IndexType j = 0; j < outVector_3.getSize(); j++ ) - outVector_3.setElement( j, 0 ); + RealType value_3 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, value_3++ ); + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, value_3++ ); // 1st row - m_3.vectorProduct( inVector_3, outVector_3 ); + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, value_3++ ); + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, value_3++ ); - EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); - EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); - EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); -/* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 0 4 0 0 \ - * | 0 5 6 7 8 0 0 0 | - * | 9 10 11 12 13 0 0 0 | - * | 0 14 15 16 17 0 0 0 | - * | 0 0 18 19 20 21 0 0 | - * | 0 0 0 22 23 24 25 0 | - * | 26 27 28 29 30 0 0 0 | - * \ 31 32 33 34 35 0 0 0 / - */ + m_3.vectorProduct( inVector_3, outVector_3 ); - const IndexType m_rows_4 = 8; - const IndexType m_cols_4 = 8; + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); - Matrix m_4; - m_4.reset(); - m_4.setDimensions( m_rows_4, m_cols_4 ); - typename Matrix::CompressedRowLengthsVector rowLengths_4; - rowLengths_4.setSize( m_rows_4 ); - rowLengths_4.setValue( 4 ); - rowLengths_4.setElement( 2, 5 ); - rowLengths_4.setElement( 6, 5 ); - rowLengths_4.setElement( 7, 5 ); - m_4.setCompressedRowLengths( rowLengths_4 ); + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ - RealType value_4 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_4.setElement( 0, i, value_4++ ); + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; - m_4.setElement( 0, 5, value_4++ ); + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; + /*rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 );*/ + m_4.setCompressedRowLengths( rowLengths_4 ); - for( IndexType i = 1; i < 5; i++ ) // 1st row - m_4.setElement( 1, i, value_4++ ); + RealType value_4 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, value_4++ ); - for( IndexType i = 0; i < 5; i++ ) // 2nd row - m_4.setElement( 2, i, value_4++ ); + m_4.setElement( 0, 5, value_4++ ); - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_4.setElement( 3, i, value_4++ ); + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, value_4++ ); - for( IndexType i = 2; i < 6; i++ ) // 4th row - m_4.setElement( 4, i, value_4++ ); + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, value_4++ ); - for( IndexType i = 3; i < 7; i++ ) // 5th row - m_4.setElement( 5, i, value_4++ ); + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, value_4++ ); - for( IndexType i = 0; i < 5; i++ ) // 6th row - m_4.setElement( 6, i, value_4++ ); + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, value_4++ ); - for( IndexType i = 0; i < 5; i++ ) // 7th row - m_4.setElement( 7, i, value_4++ ); + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, value_4++ ); - VectorType inVector_4; - inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) - inVector_4.setElement( i, 2 ); + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, value_4++ ); - VectorType outVector_4; - outVector_4.setSize( m_rows_4 ); - for( IndexType j = 0; j < outVector_4.getSize(); j++ ) - outVector_4.setElement( j, 0 ); + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, value_4++ ); + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); - m_4.vectorProduct( inVector_4, outVector_4 ); + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + m_4.vectorProduct( inVector_4, outVector_4 ); - EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); - EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); - EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); - EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); - EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); - EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); - EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); - EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); /* @@ -1048,76 +960,65 @@ void test_VectorProduct() * \ 29 30 31 32 33 34 35 36 / 8 */ - const IndexType m_rows_5 = 8; - const IndexType m_cols_5 = 8; - - Matrix m_5; - m_5.reset(); - m_5.setDimensions( m_rows_5, m_cols_5 ); - typename Matrix::CompressedRowLengthsVector rowLengths_5; - rowLengths_5.setSize( m_rows_5 ); - rowLengths_5.setElement(0, 6); - rowLengths_5.setElement(1, 3); - rowLengths_5.setElement(2, 4); - rowLengths_5.setElement(3, 5); - rowLengths_5.setElement(4, 2); - rowLengths_5.setElement(5, 7); - rowLengths_5.setElement(6, 8); - rowLengths_5.setElement(7, 8); - m_5.setCompressedRowLengths( rowLengths_5 ); - - RealType value_5 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_5.setElement( 0, i, value_5++ ); - - m_5.setElement( 0, 4, value_5++ ); // 0th row - m_5.setElement( 0, 5, value_5++ ); + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; - m_5.setElement( 1, 1, value_5++ ); // 1st row - m_5.setElement( 1, 3, value_5++ ); + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m_5.setCompressedRowLengths( rowLengths_5 ); - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_5.setElement( 2, i, value_5++ ); - - m_5.setElement( 2, 4, value_5++ ); // 2nd row + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, value_5++ ); - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_5.setElement( 3, i, value_5++ ); + m_5.setElement( 0, 4, value_5++ ); // 0th row + m_5.setElement( 0, 5, value_5++ ); - m_5.setElement( 4, 1, value_5++ ); // 4th row + m_5.setElement( 1, 1, value_5++ ); // 1st row + m_5.setElement( 1, 3, value_5++ ); - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_5.setElement( 5, i, value_5++ ); + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, value_5++ ); - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_5.setElement( 6, i, value_5++ ); + m_5.setElement( 2, 4, value_5++ ); // 2nd row - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_5.setElement( 7, i, value_5++ ); + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, value_5++ ); - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m_5.setElement( i, 7, 1); + m_5.setElement( 4, 1, value_5++ ); // 4th row - VectorType inVector_5; - inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) - inVector_5.setElement( i, 2 ); + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, value_5++ ); - VectorType outVector_5; - outVector_5.setSize( m_rows_5 ); - for( IndexType j = 0; j < outVector_5.getSize(); j++ ) - outVector_5.setElement( j, 0 ); + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, value_5++ ); - m_5.vectorProduct( inVector_5, outVector_5 ); + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, value_5++ ); - EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); - EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); - EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); - EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); - EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); - EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); } template< typename Matrix > @@ -1145,48 +1046,39 @@ void test_RowsReduction() Matrix m; m.setDimensions( rows, cols ); - typename Matrix::RowsCapacitiesType rowsCapacities( rows ); - //rowLengths.setSize( rows ); - rowsCapacities.setElement(0, 6); - rowsCapacities.setElement(1, 3); - rowsCapacities.setElement(2, 4); - rowsCapacities.setElement(3, 5); - rowsCapacities.setElement(4, 2); - rowsCapacities.setElement(5, 7); - rowsCapacities.setElement(6, 8); - rowsCapacities.setElement(7, 8); + typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; m.setCompressedRowLengths( rowsCapacities ); RealType value = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m.setElement( 0, i, value++ ); - m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 4, value++ ); // 0th row m.setElement( 0, 5, value++ ); - m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 1, value++ ); // 1st row m.setElement( 1, 3, value++ ); - for( IndexType i = 1; i < 3; i++ ) // 2nd row + for( IndexType i = 1; i < 3; i++ ) // 2nd row m.setElement( 2, i, value++ ); - m.setElement( 2, 4, value++ ); // 2nd row + m.setElement( 2, 4, value++ ); // 2nd row - for( IndexType i = 1; i < 5; i++ ) // 3rd row + for( IndexType i = 1; i < 5; i++ ) // 3rd row m.setElement( 3, i, value++ ); - m.setElement( 4, 1, value++ ); // 4th row + m.setElement( 4, 1, value++ ); // 4th row - for( IndexType i = 1; i < 7; i++ ) // 5th row + for( IndexType i = 1; i < 7; i++ ) // 5th row m.setElement( 5, i, value++ ); - for( IndexType i = 0; i < 7; i++ ) // 6th row + for( IndexType i = 0; i < 7; i++ ) // 6th row m.setElement( 6, i, value++ ); - for( IndexType i = 0; i < 8; i++ ) // 7th row + for( IndexType i = 0; i < 8; i++ ) // 7th row m.setElement( 7, i, value++ ); - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows m.setElement( i, 7, 1); //// @@ -1228,343 +1120,74 @@ void test_RowsReduction() template< typename Matrix > void test_PerformSORIteration() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 4x4 sparse matrix: - * - * / 4 1 0 0 \ - * | 1 4 1 0 | - * | 0 1 4 1 | - * \ 0 0 1 4 / - */ - - const IndexType m_rows = 4; - const IndexType m_cols = 4; - - Matrix m; - m.reset(); - m.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); - - m.setElement( 0, 0, 4.0 ); // 0th row - m.setElement( 0, 1, 1.0); - - m.setElement( 1, 0, 1.0 ); // 1st row - m.setElement( 1, 1, 4.0 ); - m.setElement( 1, 2, 1.0 ); - - m.setElement( 2, 1, 1.0 ); // 2nd row - m.setElement( 2, 2, 4.0 ); - m.setElement( 2, 3, 1.0 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - m.setElement( 3, 2, 1.0 ); // 3rd row - m.setElement( 3, 3, 4.0 ); + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ - RealType bVector [ 4 ] = { 1, 1, 1, 1 }; - RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + const IndexType m_rows = 4; + const IndexType m_cols = 4; - IndexType row = 0; - RealType omega = 1; + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + m.setCompressedRowLengths( rowLengths ); + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); - m.performSORIteration( bVector, row++, xVector, omega); + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 1.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); - m.performSORIteration( bVector, row++, xVector, omega); + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + IndexType row = 0; + RealType omega = 1; + m.performSORIteration( bVector, row++, xVector, omega); - m.performSORIteration( bVector, row++, xVector, omega); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); + m.performSORIteration( bVector, row++, xVector, omega); + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); - m.performSORIteration( bVector, row++, xVector, omega); + m.performSORIteration( bVector, row++, xVector, omega); - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 0.25 ); -} + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); -// This test is only for AdEllpack -template< typename Matrix > -void test_OperatorEquals() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + m.performSORIteration( bVector, row++, xVector, omega); - if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) - return; - else - { - using AdELL_host = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Host, IndexType >; - using AdELL_cuda = TNL::Matrices::AdEllpack< RealType, TNL::Devices::Cuda, IndexType >; - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ - - const IndexType m_rows = 8; - const IndexType m_cols = 8; - - AdELL_host m_host; - - m_host.reset(); - m_host.setDimensions( m_rows, m_cols ); - typename AdELL_host::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setElement(0, 6); - rowLengths.setElement(1, 3); - rowLengths.setElement(2, 4); - rowLengths.setElement(3, 5); - rowLengths.setElement(4, 2); - rowLengths.setElement(5, 7); - rowLengths.setElement(6, 8); - rowLengths.setElement(7, 8); - m_host.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_host.setElement( 0, i, value++ ); - - m_host.setElement( 0, 4, value++ ); // 0th row - m_host.setElement( 0, 5, value++ ); - - m_host.setElement( 1, 1, value++ ); // 1st row - m_host.setElement( 1, 3, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_host.setElement( 2, i, value++ ); - - m_host.setElement( 2, 4, value++ ); // 2nd row - - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_host.setElement( 3, i, value++ ); - - m_host.setElement( 4, 1, value++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_host.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_host.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_host.setElement( 7, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 - m_host.setElement( i, 7, 1); - - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - AdELL_cuda m_cuda; - - // Copy the host matrix into the cuda matrix - m_cuda = m_host; - - // Reset the host matrix - m_host.reset(); - - // Copy the cuda matrix back into the host matrix - m_host = m_cuda; - - // Check the newly created double-copy host matrix - EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m_host.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 4 ), 4 ); - EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); - EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 3 ), 7 ); - EXPECT_EQ( m_host.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); - EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); - EXPECT_EQ( m_host.getElement( 2, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 4 ), 10 ); - EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m_host.getElement( 3, 3 ), 13 ); - EXPECT_EQ( m_host.getElement( 3, 4 ), 14 ); - EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); - EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); - EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); - EXPECT_EQ( m_host.getElement( 5, 3 ), 18 ); - EXPECT_EQ( m_host.getElement( 5, 4 ), 19 ); - EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); - EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); - EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); - EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); - EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); - EXPECT_EQ( m_host.getElement( 6, 3 ), 25 ); - EXPECT_EQ( m_host.getElement( 6, 4 ), 26 ); - EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); - EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); - EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - - EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); - EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); - EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); - EXPECT_EQ( m_host.getElement( 7, 3 ), 32 ); - EXPECT_EQ( m_host.getElement( 7, 4 ), 33 ); - EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); - EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); - EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - - // Try vectorProduct with copied cuda matrix to see if it works correctly. - using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; - - VectorType inVector; - inVector.setSize( m_cols ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) - inVector.setElement( i, 2 ); - - VectorType outVector; - outVector.setSize( m_rows ); - for( IndexType j = 0; j < outVector.getSize(); j++ ) - outVector.setElement( j, 0 ); - - m_cuda.vectorProduct( inVector, outVector ); - - EXPECT_EQ( outVector.getElement( 0 ), 32 ); - EXPECT_EQ( outVector.getElement( 1 ), 28 ); - EXPECT_EQ( outVector.getElement( 2 ), 56 ); - EXPECT_EQ( outVector.getElement( 3 ), 102 ); - EXPECT_EQ( outVector.getElement( 4 ), 32 ); - EXPECT_EQ( outVector.getElement( 5 ), 224 ); - EXPECT_EQ( outVector.getElement( 6 ), 352 ); - EXPECT_EQ( outVector.getElement( 7 ), 520 ); - } + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); } template< typename Matrix > @@ -1583,149 +1206,136 @@ void test_SaveAndLoad( const char* filename ) * \ 0 9 10 11 / */ - const IndexType m_rows = 4; - const IndexType m_cols = 4; - - Matrix savedMatrix; - savedMatrix.reset(); - savedMatrix.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - savedMatrix.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - savedMatrix.setElement( 0, i, value++ ); + const IndexType m_rows = 4; + const IndexType m_cols = 4; - savedMatrix.setElement( 1, 1, value++ ); - savedMatrix.setElement( 1, 3, value++ ); // 1st row + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + savedMatrix.setCompressedRowLengths( rowLengths ); - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - savedMatrix.setElement( 2, i, value++ ); + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, value++ ); - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - savedMatrix.setElement( 3, i, value++ ); + savedMatrix.setElement( 1, 1, value++ ); + savedMatrix.setElement( 1, 3, value++ ); // 1st row - ASSERT_NO_THROW( savedMatrix.save( filename ) ); + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, value++ ); - Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2; - rowLengths2.setSize( m_rows ); - rowLengths2.setValue( 3 ); - loadedMatrix.setCompressedRowLengths( rowLengths2 ); + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, value++ ); + ASSERT_NO_THROW( savedMatrix.save( filename ) ); - ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + Matrix loadedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); + rowLengths2 = 3; + loadedMatrix.setCompressedRowLengths( rowLengths2 ); + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); - EXPECT_EQ( std::remove( filename ), 0 ); + EXPECT_EQ( std::remove( filename ), 0 ); } template< typename Matrix > void test_Print() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - -/* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * | 0 8 9 10 | - * \ 0 0 11 12 / - */ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType m_rows = 5; - const IndexType m_cols = 4; + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ - Matrix m; - m.reset(); - m.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); + const IndexType m_rows = 5; + const IndexType m_cols = 4; - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, value++ ); + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + m.setCompressedRowLengths( rowLengths ); - m.setElement( 1, 3, value++ ); // 1st row + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, value++ ); - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); + m.setElement( 1, 3, value++ ); // 1st row - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, value++ ); + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, value++ ); + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); - #include - std::stringstream printed; - std::stringstream couted; + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, value++ ); - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + std::stringstream printed; + std::stringstream couted; - m.print( std::cout ); //all the std::cout goes to ss + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); - std::cout.rdbuf(old_buf); //reset + m.print( std::cout ); //all the std::cout goes to ss - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" - "Row: 1 -> Col:3->4\t\n" - "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" - "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" - "Row: 4 -> Col:2->11 Col:3->12\t\n"; + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" + "Row: 1 -> Col:3->4\t\n" + "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" + "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" + "Row: 4 -> Col:2->11 Col:3->12\t\n"; - EXPECT_EQ( printed.str(), couted.str() ); + EXPECT_EQ( printed.str(), couted.str() ); } #endif -- GitLab From 52c9d170fd45dd27850c89d9d280cb1b5515c4bc Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 7 Feb 2020 15:29:18 +0100 Subject: [PATCH 134/179] Added Array constructor with size and value. --- src/TNL/Containers/Array.h | 9 +++++++++ src/TNL/Containers/Array.hpp | 12 ++++++++++++ src/UnitTests/Containers/ArrayTest.h | 5 +++++ 3 files changed, 26 insertions(+) diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index a9705e66f..bf69f4888 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -137,6 +137,15 @@ class Array */ explicit Array( const IndexType& size, const AllocatorType& allocator = AllocatorType() ); + /** + * \brief Constructs an array with given size and value. + * + * \param size The number of array elements to be allocated. + * \param value The value all elements will be set to. + * \param allocator The allocator to be associated with this array. + */ + explicit Array( const IndexType& size, const Value& value, const AllocatorType& allocator = AllocatorType() ); + /** * \brief Constructs an array with given size and copies data from given * pointer. diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 2a60986f5..4dd8d5a2f 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -62,6 +62,18 @@ Array( const IndexType& size, const AllocatorType& allocator ) this->setSize( size ); } +template< typename Value, + typename Device, + typename Index, + typename Allocator > +Array< Value, Device, Index, Allocator >:: +Array( const IndexType& size, const Value& value, const AllocatorType& allocator ) +: allocator( allocator ) +{ + this->setSize( size ); + ( *this ) = value; +} + template< typename Value, typename Device, typename Index, diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index ef3119365..255a67fb9 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -135,6 +135,11 @@ TYPED_TEST( ArrayTest, constructors ) v = 0; EXPECT_EQ( v.getSize(), 10 ); + ArrayType vv( 10, 4 ); + EXPECT_EQ( vv.getSize(), 10 ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( vv.getElement( i ), 4 ); + // deep copy ArrayType w( v ); EXPECT_NE( w.getData(), v.getData() ); -- GitLab From 5d79ecbc2e7470ae246cd568b3b389a37514111d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Fri, 7 Feb 2020 23:02:07 +0100 Subject: [PATCH 135/179] Added SparseMatrix constructors with initializer lists. --- src/TNL/Matrices/SparseMatrix.h | 11 +++ src/TNL/Matrices/SparseMatrix.hpp | 42 ++++++++ src/UnitTests/Matrices/SparseMatrixTest.h | 115 ++++++++++++++++------ 3 files changed, 136 insertions(+), 32 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 7072ce3c4..15f585716 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -74,6 +74,17 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities, + const IndexType columns, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + SparseMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 992443434..938d883af 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -73,6 +73,48 @@ SparseMatrix( const IndexType rows, { } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities, + const IndexType columns, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator ) +{ + this->setCompressedRowLengths( RowCapacitiesType ( rowCapacities ) ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) +{ + Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 ); + for( const auto& i : data ) + rowCapacities[ std::get< 0 >( i ) ]++; + SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); + hostMatrix.setCompressedRowLength( rowCapacities ); + for( const auto& i : data ) + hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); + ( *this ) = hostMatrix; +} + template< typename Real, typename Device, typename Index, diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 04a9b065f..26b15fafd 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -36,6 +36,79 @@ void cuda_test_GetType() std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } +template< typename Matrix > +void test_Constructors() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + Matrix m1( 5, 6 ); + EXPECT_EQ( m1.getRows(), 5 ); + EXPECT_EQ( m1.getColumns(), 6 ); + + Matrix m2( {1, 2, 2, 2, 1 }, 5 ); + typename Matrix::RowCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; + m2.getCompressedRowLength( v1 ); + EXPECT_EQ( v1, v2 ); + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + + Matrix m3( 6, 5, { + { 0, 0, 1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 }, + { 1, 1, 4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 }, + { 2, 2, 7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 }, + { 3, 0, 10.0 }, + { 4, 1, 11.0 }, + { 5, 3, 12.0 } } ); + + // Check the set elements + EXPECT_EQ( m3.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m3.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m3.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m3.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m3.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m3.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m3.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m3.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m3.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m3.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m3.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m3.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m3.getElement( 5, 4 ), 0 ); +} + template< typename Matrix > void test_SetDimensions() { @@ -64,9 +137,7 @@ void test_SetCompressedRowLengths() const IndexType cols = 11; Matrix m( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( rows ); - rowLengths = 3; + typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); IndexType rowLength = 1; for( IndexType i = 2; i < rows; i++ ) @@ -592,8 +663,7 @@ void test_AddElement() const IndexType cols = 5; Matrix m( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( rows ); - rowLengths = 3; + typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); m.setCompressedRowLengths( rowLengths ); RealType value = 1; @@ -742,12 +812,7 @@ void test_VectorProduct() Matrix m_1; m_1.reset(); m_1.setDimensions( m_rows_1, m_cols_1 ); - typename Matrix::CompressedRowLengthsVector rowLengths_1; - rowLengths_1.setSize( m_rows_1 ); - rowLengths_1.setElement( 0, 1 ); - rowLengths_1.setElement( 1, 2 ); - rowLengths_1.setElement( 2, 1 ); - rowLengths_1.setElement( 3, 1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 }; m_1.setCompressedRowLengths( rowLengths_1 ); RealType value_1 = 1; @@ -770,10 +835,8 @@ void test_VectorProduct() for( IndexType j = 0; j < outVector_1.getSize(); j++ ) outVector_1.setElement( j, 0 ); - m_1.vectorProduct( inVector_1, outVector_1 ); - EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); @@ -793,21 +856,18 @@ void test_VectorProduct() Matrix m_2( m_rows_2, m_cols_2 ); typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; - /*rowLengths_2 = 3; - rowLengths_2.setElement( 1, 1 ); - rowLengths_2.setElement( 3, 1 );*/ m_2.setCompressedRowLengths( rowLengths_2 ); RealType value_2 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row + for( IndexType i = 0; i < 3; i++ ) // 0th row m_2.setElement( 0, i, value_2++ ); m_2.setElement( 1, 3, value_2++ ); // 1st row - for( IndexType i = 0; i < 3; i++ ) // 2nd row + for( IndexType i = 0; i < 3; i++ ) // 2nd row m_2.setElement( 2, i, value_2++ ); - for( IndexType i = 1; i < 2; i++ ) // 3rd row + for( IndexType i = 1; i < 2; i++ ) // 3rd row m_2.setElement( 3, i, value_2++ ); VectorType inVector_2; @@ -891,11 +951,6 @@ void test_VectorProduct() Matrix m_4( m_rows_4, m_cols_4 ); typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; - /*rowLengths_4.setSize( m_rows_4 ); - rowLengths_4.setValue( 4 ); - rowLengths_4.setElement( 2, 5 ); - rowLengths_4.setElement( 6, 5 ); - rowLengths_4.setElement( 7, 5 );*/ m_4.setCompressedRowLengths( rowLengths_4 ); RealType value_4 = 1; @@ -1137,8 +1192,7 @@ void test_PerformSORIteration() const IndexType m_cols = 4; Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); m.setCompressedRowLengths( rowLengths ); m.setElement( 0, 0, 4.0 ); // 0th row @@ -1210,8 +1264,7 @@ void test_SaveAndLoad( const char* filename ) const IndexType m_cols = 4; Matrix savedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); savedMatrix.setCompressedRowLengths( rowLengths ); RealType value = 1; @@ -1230,8 +1283,7 @@ void test_SaveAndLoad( const char* filename ) ASSERT_NO_THROW( savedMatrix.save( filename ) ); Matrix loadedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); - rowLengths2 = 3; + typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows, 3 ); loadedMatrix.setCompressedRowLengths( rowLengths2 ); ASSERT_NO_THROW( loadedMatrix.load( filename ) ); @@ -1300,8 +1352,7 @@ void test_Print() const IndexType m_cols = 4; Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); m.setCompressedRowLengths( rowLengths ); RealType value = 1; -- GitLab From c869e97679ebe72877d1ce94c19a1a5ccb9aae13 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sat, 8 Feb 2020 11:17:15 +0100 Subject: [PATCH 136/179] Fixing sparse matrix constructors with initializer list together with unit tests. --- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 6 ++--- src/UnitTests/Matrices/SparseMatrixTest.h | 25 +++++++++++++++---- src/UnitTests/Matrices/SparseMatrixTest_CSR.h | 7 ++++++ .../Matrices/SparseMatrixTest_Ellpack.h | 7 ++++++ .../Matrices/SparseMatrixTest_SlicedEllpack.h | 7 ++++++ 6 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 15f585716..573d382ce 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -74,7 +74,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); - SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities, + SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, const IndexType columns, const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 938d883af..df841230c 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -81,13 +81,13 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -SparseMatrix( const std::initializer_list< std::tuple< IndexType > >& rowCapacities, +SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, const IndexType columns, const RealAllocatorType& realAllocator, const IndexAllocatorType& indexAllocator ) : BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator ) { - this->setCompressedRowLengths( RowCapacitiesType ( rowCapacities ) ); + this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) ); } template< typename Real, @@ -109,7 +109,7 @@ SparseMatrix( const IndexType rows, for( const auto& i : data ) rowCapacities[ std::get< 0 >( i ) ]++; SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); - hostMatrix.setCompressedRowLength( rowCapacities ); + hostMatrix.setCompressedRowLengths( rowCapacities ); for( const auto& i : data ) hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); ( *this ) = hostMatrix; diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 26b15fafd..72eb0b33b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -48,8 +48,17 @@ void test_Constructors() EXPECT_EQ( m1.getColumns(), 6 ); Matrix m2( {1, 2, 2, 2, 1 }, 5 ); - typename Matrix::RowCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; - m2.getCompressedRowLength( v1 ); + typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; + m2.setElement( 0, 0, 1 ); // 0th row + m2.setElement( 1, 0, 1 ); // 1st row + m2.setElement( 1, 1, 1 ); + m2.setElement( 2, 1, 1 ); // 2nd row + m2.setElement( 2, 2, 1 ); + m2.setElement( 3, 2, 1 ); // 3rd row + m2.setElement( 3, 3, 1 ); + m2.setElement( 4, 4, 1 ); // 4th row + m2.getCompressedRowLengths( v1 ); + EXPECT_EQ( v1, v2 ); /* @@ -662,8 +671,14 @@ void test_AddElement() const IndexType rows = 6; const IndexType cols = 5; - Matrix m( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); + Matrix m( rows, cols, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, + { 1, 1, 4 }, { 1, 2, 5 }, { 1, 3, 6 }, + { 2, 2, 7 }, { 2, 3, 8 }, { 2, 4, 9 }, + { 3, 0, 10 }, { 3, 1, 0 }, { 3, 2, 0 }, + { 4, 1, 11 }, { 4, 2, 0 }, { 4, 3, 0 }, + { 5, 2, 0 }, { 5, 3, 12 }, { 5, 4, 0 } } ); + /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); m.setCompressedRowLengths( rowLengths ); RealType value = 1; @@ -680,7 +695,7 @@ void test_AddElement() m.setElement( 4, 1, value++ ); // 4th row - m.setElement( 5, 3, value++ ); // 5th row + m.setElement( 5, 3, value++ ); // 5th row*/ // Check the set elements diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index 781735e7f..f029c3bc7 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -59,6 +59,13 @@ using CSRMatrixTypes = ::testing::Types TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); +TYPED_TEST( CSRMatrixTest, Constructors ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Constructors< CSRMatrixType >(); +} + TYPED_TEST( CSRMatrixTest, setDimensionsTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 9650105f6..2bf5fe20d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -70,6 +70,13 @@ using EllpackMatrixTypes = ::testing::Types TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes); +TYPED_TEST( EllpackMatrixTest, Constructors ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Constructors< EllpackMatrixType >(); +} + TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 11365cc5b..190839fd5 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -71,6 +71,13 @@ using SlicedEllpackMatrixTypes = ::testing::Types TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); +TYPED_TEST( SlicedEllpackMatrixTest, Constructors ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Constructors< SlicedEllpackMatrixType >(); +} + TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -- GitLab From 57c3a68104de9a748293cbaac0ae975f8bf19a58 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Sat, 8 Feb 2020 14:23:27 +0100 Subject: [PATCH 137/179] Implementing symmetric sparse matrix unit tests. --- .../Matrices/SymmetricSparseMatrixTest.h | 1304 +++++++++++++++++ 1 file changed, 1304 insertions(+) create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest.h diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h new file mode 100644 index 000000000..fd6bd8464 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -0,0 +1,1304 @@ +/*************************************************************************** + SymmetricSparseMatrixTest.h - description + ------------------- + begin : Feb 7, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_GTEST +#include + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + / 1 2 4 7 \ + | 2 3 5 8 10 13 16 19 | + | 4 5 6 11 14 21 24 27 | + | 7 8 9 17 20 | + | 10 11 12 22 25 | + | 13 14 15 28 | + | 16 17 18 | + | 19 20 21 | + | 21 22 23 | + | 24 25 26 | + \ 27 28 30 / + */ + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + RealType value = 1; + + // 0th row - lower part + m.setElement( 0, 0, value++ ); + + // 1st row - lower part + m.setElement( 1, 0, value++ ); + m.setElement( 1, 1, value++ ); + + // 2nd row - lower part + m.setElement( 2, 0, value++ ); + m.setElement( 2, 1, value++ ); + m.setElement( 2, 2, value++ ); + + // 3rd row - lower part + m.setElement( 3, 0, value++ ); + m.setElement( 3, 1, value++ ); + m.setElement( 3, 3, value++ ); + + // 4th row - lower part + m.setElement( 4, 1, value++ ); + m.setElement( 4, 2, value++ ); + m.setElement( 4, 4, value++ ); + + // 5th row - lower part + m.setElement( 5, 1, value++ ); + m.setElement( 5, 2, value++ ); + m.setElement( 5, 5, value++ ); + + // 6th row - lower part + m.setElement( 6, 1, value++ ); + m.setElement( 6, 3, value++ ); + m.setElement( 6, 6, value++ ); + + // 7th row - lower part + m.setElement( 7, 1, value++ ); + m.setElement( 7, 3, value++ ); + m.setElement( 7, 7, value++ ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + / 1 2 4 7 \ -> 4 + | 2 3 5 8 10 13 16 19 | -> 8 + | 4 5 6 11 14 21 25 28 | -> 8 + | 7 8 9 17 20 | -> 5 + | 10 11 12 23 26 | -> 5 + | 13 14 15 29 | -> 4 + | 16 17 18 | -> 3 + | 19 20 21 | -> 3 + | 22 23 24 | -> 3 + | 25 26 27 | -> 3 + \ 28 29 30 / -> 3 + ---- + 49 + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 0, 4 }, { 2, 1, 5 }, { 2, 2, 6 }, + { 3, 0, 7 }, { 3, 1, 8 }, , { 3, 3, 9 }, + { 4, 1, 10 }, { 4, 2, 11 }, { 4, 4, 12 }, + { 5, 1, 13 }, { 5, 2, 14 }, { 5, 5, 15 }, + { 6, 1, 16 }, { 6, 3, 17 }, { 6, 6, 18 }, + { 7, 1, 19 }, { 7, 3, 20 }, { 7, 7, 21 }, + { 8, 2, 22 }, { 8, 4, 23 }, { 8, 8, 24 }, + { 9, 2, 25 }, { 9, 4, 26 }, { 9, 9, 27 } + { 10, 2, 28 }, { 10, 4, 29 }, { 10, 10, 30 } + } ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 11x11 sparse matrix: + * + / 1 2 4 7 \ + | 2 3 5 8 10 13 16 19 | + | 4 5 6 11 14 22 25 28 | + | 7 8 9 17 20 | + | 10 11 12 23 26 | + | 13 14 15 29 | + | 16 17 18 | + | 19 20 21 | + | 22 23 24 | + | 25 26 27 | + \ 28 29 30 / + */ + + Matrix m( { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 11 ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: row.setElement( 0, 0, 1 ); break; + case 1: row.setElement( 0, 0, 2 ); row.setElement( 1, 1, 3 ); break; + case 2: row.setElement( 0, 0, 4 ); row.setElement( 1, 1, 5 ); row.setElement( 2, 2, 6 ); break; + case 3: row.setElement( 0, 0, 7 ); row.setElement( 1, 1, 8 ); row.setElement( 2, 3, 9 ); break; + case 4: row.setElement( 0, 1, 10 ); row.setElement( 1, 2, 11 ); row.setElement( 2, 4, 12 ); break; + case 5: row.setElement( 0, 1, 13 ); row.setElement( 1, 2, 14 ); row.setElement( 2, 5, 15 ); break; + case 6: row.setElement( 0, 1, 16 ); row.setElement( 1, 3, 17 ); row.setElement( 2, 6, 18 ); break; + case 7: row.setElement( 0, 1, 19 ); row.setElement( 1, 3, 20 ); row.setElement( 2, 7, 21 ); break; + case 8: row.setElement( 0, 2, 22 ); row.setElement( 1, 4, 23 ); row.setElement( 2, 8, 24 ); break; + case 9: row.setElement( 0, 2, 25 ); row.setElement( 1, 4, 26 ); row.setElement( 2, 9, 27 ); break; + case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 4 ); + EXPECT_EQ( m.getElement( 0, 3 ), 7 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + EXPECT_EQ( m.getElement( 0, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 8 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 16 ); + EXPECT_EQ( m.getElement( 1, 7 ), 19 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + EXPECT_EQ( m.getElement( 1, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 4 ); + EXPECT_EQ( m.getElement( 2, 1 ), 5 ); + EXPECT_EQ( m.getElement( 2, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 11 ); + EXPECT_EQ( m.getElement( 2, 5 ), 14 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m.getElement( 2, 7 ), 0 ); + EXPECT_EQ( m.getElement( 2, 8 ), 22 ); + EXPECT_EQ( m.getElement( 2, 9 ), 25 ); + EXPECT_EQ( m.getElement( 2, 10 ), 28 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 7 ); + EXPECT_EQ( m.getElement( 3, 1 ), 8 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 17 ); + EXPECT_EQ( m.getElement( 3, 7 ), 20 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + EXPECT_EQ( m.getElement( 3, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 10 ); + EXPECT_EQ( m.getElement( 4, 2 ), 11 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 12 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 23 ); + EXPECT_EQ( m.getElement( 4, 9 ), 26 ); + EXPECT_EQ( m.getElement( 4, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 13 ); + EXPECT_EQ( m.getElement( 5, 2 ), 14 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 15 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + EXPECT_EQ( m.getElement( 5, 10 ), 29 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 16 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 17 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 18 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + EXPECT_EQ( m.getElement( 6, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 0 ); + EXPECT_EQ( m.getElement( 7, 1 ), 19 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 20 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 21 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + EXPECT_EQ( m.getElement( 7, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 0 ); + EXPECT_EQ( m.getElement( 8, 1 ), 0 ); + EXPECT_EQ( m.getElement( 8, 2 ), 22 ); + EXPECT_EQ( m.getElement( 8, 3 ), 0 ); + EXPECT_EQ( m.getElement( 8, 4 ), 23 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + EXPECT_EQ( m.getElement( 8, 6 ), 0 ); + EXPECT_EQ( m.getElement( 8, 7 ), 0 ); + EXPECT_EQ( m.getElement( 8, 8 ), 24 ); + EXPECT_EQ( m.getElement( 8, 9 ), 0 ); + EXPECT_EQ( m.getElement( 8, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 25 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 26 ); + EXPECT_EQ( m.getElement( 9, 5 ), 0 ); + EXPECT_EQ( m.getElement( 9, 6 ), 0 ); + EXPECT_EQ( m.getElement( 9, 7 ), 0 ); + EXPECT_EQ( m.getElement( 9, 8 ), 0 ); + EXPECT_EQ( m.getElement( 9, 9 ), 27 ); + EXPECT_EQ( m.getElement( 9, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 10, 0 ), 0 ); + EXPECT_EQ( m.getElement( 10, 1 ), 0 ); + EXPECT_EQ( m.getElement( 10, 2 ), 28 ); + EXPECT_EQ( m.getElement( 10, 3 ), 0 ); + EXPECT_EQ( m.getElement( 10, 4 ), 0 ); + EXPECT_EQ( m.getElement( 10, 5 ), 29 ); + EXPECT_EQ( m.getElement( 10, 6 ), 0 ); + EXPECT_EQ( m.getElement( 10, 7 ), 0 ); + EXPECT_EQ( m.getElement( 10, 8 ), 0 ); + EXPECT_EQ( m.getElement( 10, 9 ), 0 ); + EXPECT_EQ( m.getElement( 10, 10 ), 30 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 0 4 0 0 10 0 0 0 \ + * | 0 2 0 5 0 0 11 0 0 0 | + * | 0 0 3 6 0 0 12 0 0 0 | + * | 4 5 6 7 0 0 13 0 0 0 | + * | 0 0 0 0 8 0 14 0 0 0 | + * | 0 0 0 0 0 9 15 0 0 0 | + * | 10 11 12 13 14 15 16 0 0 0 | + * | 0 0 0 0 0 0 0 17 0 0 | + * | 0 0 0 0 0 0 0 0 18 0 | + * \ 0 0 0 0 0 0 0 0 0 19 / + */ + + Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( i, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 6; i++ ) + m.setElement( i, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) + m.setElement( 6, i, value++ ); + + for( IndexType i = 7; i < 10; i++ ) + m.setElement( i, i, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 10 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 2 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 11 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 3 ); + EXPECT_EQ( m.getElement( 2, 3 ), 6 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 12 ); + EXPECT_EQ( m.getElement( 2, 7 ), 0 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 4 ); + EXPECT_EQ( m.getElement( 3, 1 ), 5 ); + EXPECT_EQ( m.getElement( 3, 2 ), 6 ); + EXPECT_EQ( m.getElement( 3, 3 ), 7 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 13 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 8 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 14 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 9 ); + EXPECT_EQ( m.getElement( 5, 6 ), 15 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 10 ); + EXPECT_EQ( m.getElement( 6, 1 ), 11 ); + EXPECT_EQ( m.getElement( 6, 2 ), 12 ); + EXPECT_EQ( m.getElement( 6, 3 ), 13 ); + EXPECT_EQ( m.getElement( 6, 4 ), 14 ); + EXPECT_EQ( m.getElement( 6, 5 ), 15 ); + EXPECT_EQ( m.getElement( 6, 6 ), 16 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 0 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 17 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 0 ); + EXPECT_EQ( m.getElement( 8, 1 ), 0 ); + EXPECT_EQ( m.getElement( 8, 2 ), 0 ); + EXPECT_EQ( m.getElement( 8, 3 ), 0 ); + EXPECT_EQ( m.getElement( 8, 4 ), 0 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + EXPECT_EQ( m.getElement( 8, 6 ), 0 ); + EXPECT_EQ( m.getElement( 8, 7 ), 0 ); + EXPECT_EQ( m.getElement( 8, 8 ), 18 ); + EXPECT_EQ( m.getElement( 8, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 0 ); + EXPECT_EQ( m.getElement( 9, 6 ), 0 ); + EXPECT_EQ( m.getElement( 9, 7 ), 0 ); + EXPECT_EQ( m.getElement( 9, 8 ), 0 ); + EXPECT_EQ( m.getElement( 9, 9 ), 19 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 0 0 0 \ + * | 2 3 4 0 0 | + * | 0 4 5 6 0 | + * | 0 0 6 7 8 | + * | 0 0 0 8 9 | + * \ 0 0 0 0 10 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( 6, 5, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 1, 4 }, { 2, 2, 5 }, + { 3, 2, 6 }, { 3, 3, 7 }, + { 4, 3, 8 }, { 4, 4, 9 }, + { 5, 5, 10 } } ); + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 4 ); + EXPECT_EQ( m.getElement( 2, 2 ), 5 ); + EXPECT_EQ( m.getElement( 2, 3 ), 6 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 6 ); + EXPECT_EQ( m.getElement( 3, 3 ), 7 ); + EXPECT_EQ( m.getElement( 3, 4 ), 8 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 8 ); + EXPECT_EQ( m.getElement( 4, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 10 ); +############################################################################ + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 0 0 \ + * | 0 12 15 18 0 | + * | 0 0 21 24 27 | + * | 30 11 12 0 0 | + * | 0 35 14 15 0 | + * \ 0 0 16 41 18 / + */ + + RealType newValue = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.addElement( 0, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.addElement( 1, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.addElement( 2, i, newValue++, 2.0 ); + + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row + m.addElement( 3, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row + m.addElement( 4, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 5th row + m.addElement( 5, i, newValue++, 2.0 ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 12 ); + EXPECT_EQ( m.getElement( 1, 2 ), 15 ); + EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 21 ); + EXPECT_EQ( m.getElement( 2, 3 ), 24 ); + EXPECT_EQ( m.getElement( 2, 4 ), 27 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 35 ); + EXPECT_EQ( m.getElement( 4, 2 ), 14 ); + EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 16 ); + EXPECT_EQ( m.getElement( 5, 3 ), 41 ); + EXPECT_EQ( m.getElement( 5, 4 ), 18 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); + + RealType value_1 = 1; + m_1.setElement( 0, 0, value_1++ ); // 0th row + + m_1.setElement( 1, 1, value_1++ ); // 1st row + m_1.setElement( 1, 3, value_1++ ); + + m_1.setElement( 2, 1, value_1++ ); // 2nd row + + m_1.setElement( 3, 2, value_1++ ); // 3rd row + + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); + + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); + + + m_1.vectorProduct( inVector_1, outVector_1 ); + + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; + /*rowLengths_2 = 3; + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 );*/ + m_2.setCompressedRowLengths( rowLengths_2 ); + + RealType value_2 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, value_2++ ); + + m_2.setElement( 1, 3, value_2++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, value_2++ ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, value_2++ ); + + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); + + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; + m_3.setCompressedRowLengths( rowLengths_3 ); + + RealType value_3 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, value_3++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, value_3++ ); + + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); + + m_3.vectorProduct( inVector_3, outVector_3 ); + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; + /*rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 );*/ + m_4.setCompressedRowLengths( rowLengths_4 ); + + RealType value_4 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, value_4++ ); + + m_4.setElement( 0, 5, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, value_4++ ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, value_4++ ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, value_4++ ); + + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); + + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + + m_4.vectorProduct( inVector_4, outVector_4 ); + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, value_5++ ); + + m_5.setElement( 0, 4, value_5++ ); // 0th row + m_5.setElement( 0, 5, value_5++ ); + + m_5.setElement( 1, 1, value_5++ ); // 1st row + m_5.setElement( 1, 3, value_5++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, value_5++ ); + + m_5.setElement( 2, 4, value_5++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, value_5++ ); + + m_5.setElement( 4, 1, value_5++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, value_5++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * \ 0 9 10 11 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + savedMatrix.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, value++ ); + + savedMatrix.setElement( 1, 1, value++ ); + savedMatrix.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); + rowLengths2 = 3; + loadedMatrix.setCompressedRowLengths( rowLengths2 ); + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); + rowLengths = 3; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" + "Row: 1 -> Col:3->4\t\n" + "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" + "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" + "Row: 4 -> Col:2->11 Col:3->12\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif -- GitLab From 49f09e5ff82a44bde3644a11cc4de41a83f95151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 9 Feb 2020 19:01:14 +0100 Subject: [PATCH 138/179] Implementing symmetric sparse matrix unit tests. --- .../Matrices/SymmetricSparseMatrixTest.h | 325 ++++++------------ 1 file changed, 106 insertions(+), 219 deletions(-) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index fd6bd8464..1beddaf7e 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -612,74 +612,58 @@ void test_AddElement() EXPECT_EQ( m.getElement( 5, 2 ), 0 ); EXPECT_EQ( m.getElement( 5, 3 ), 0 ); EXPECT_EQ( m.getElement( 5, 4 ), 10 ); -############################################################################ + // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 sparse matrix: * - * / 3 6 9 0 0 \ - * | 0 12 15 18 0 | - * | 0 0 21 24 27 | - * | 30 11 12 0 0 | - * | 0 35 14 15 0 | - * \ 0 0 16 41 18 / + * / 1 2 0 0 0 \ / 0 1 0 0 0 \ / 2 5 0 0 0 \ + * | 2 3 4 0 0 | | 1 0 1 0 0 | | 5 6 9 0 0 | + * 2 | 0 4 5 6 0 | + | 0 1 0 1 0 | = | 0 9 10 13 0 | + * | 0 0 6 7 8 | | 0 0 1 0 1 | | 0 0 13 14 17 | + * | 0 0 0 8 9 | | 0 0 0 1 0 | | 0 0 0 17 18 | + * \ 0 0 0 0 10 / \ 0 0 0 0 1 / \ 0 0 0 0 21 / */ - RealType newValue = 1; - for( IndexType i = 0; i < cols - 2; i++ ) // 0th row - m.addElement( 0, i, newValue++, 2.0 ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 1st row - m.addElement( 1, i, newValue++, 2.0 ); - - for( IndexType i = 2; i < cols; i++ ) // 2nd row - m.addElement( 2, i, newValue++, 2.0 ); - - for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row - m.addElement( 3, i, newValue++, 2.0 ); + for( IndexType i = 1; i < rows; i++ ) + m.addElement( i, i - 1, 1.0, 2.0 ); - for( IndexType i = 1; i < cols - 1; i++ ) // 4th row - m.addElement( 4, i, newValue++, 2.0 ); - for( IndexType i = 2; i < cols; i++ ) // 5th row - m.addElement( 5, i, newValue++, 2.0 ); - - - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 0 ), 2 ); + EXPECT_EQ( m.getElement( 0, 1 ), 5 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m.getElement( 1, 1 ), 12 ); - EXPECT_EQ( m.getElement( 1, 2 ), 15 ); - EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 9 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 21 ); - EXPECT_EQ( m.getElement( 2, 3 ), 24 ); - EXPECT_EQ( m.getElement( 2, 4 ), 27 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 30 ); - EXPECT_EQ( m.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m.getElement( 3, 2 ), 12 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 13 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 13 ); + EXPECT_EQ( m.getElement( 3, 3 ), 14 ); + EXPECT_EQ( m.getElement( 3, 4 ), 17 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 35 ); - EXPECT_EQ( m.getElement( 4, 2 ), 14 ); - EXPECT_EQ( m.getElement( 4, 3 ), 15 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 16 ); - EXPECT_EQ( m.getElement( 5, 3 ), 41 ); - EXPECT_EQ( m.getElement( 5, 4 ), 18 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 21 ); } template< typename Matrix > @@ -690,226 +674,129 @@ void test_VectorProduct() using IndexType = typename Matrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - /* + /** * Sets up the following 4x4 sparse matrix: * * / 1 0 0 0 \ - * | 0 2 0 3 | - * | 0 4 0 0 | - * \ 0 0 5 0 / + * | 0 2 3 4 | + * | 0 3 0 5 | + * \ 0 4 5 0 / */ const IndexType m_rows_1 = 4; const IndexType m_cols_1 = 4; - Matrix m_1; - m_1.reset(); - m_1.setDimensions( m_rows_1, m_cols_1 ); - typename Matrix::CompressedRowLengthsVector rowLengths_1; - rowLengths_1.setSize( m_rows_1 ); - rowLengths_1.setElement( 0, 1 ); - rowLengths_1.setElement( 1, 2 ); - rowLengths_1.setElement( 2, 1 ); - rowLengths_1.setElement( 3, 1 ); - m_1.setCompressedRowLengths( rowLengths_1 ); - - RealType value_1 = 1; - m_1.setElement( 0, 0, value_1++ ); // 0th row - - m_1.setElement( 1, 1, value_1++ ); // 1st row - m_1.setElement( 1, 3, value_1++ ); - - m_1.setElement( 2, 1, value_1++ ); // 2nd row - - m_1.setElement( 3, 2, value_1++ ); // 3rd row - - VectorType inVector_1; - inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) - inVector_1.setElement( i, 2 ); - - VectorType outVector_1; - outVector_1.setSize( m_rows_1 ); - for( IndexType j = 0; j < outVector_1.getSize(); j++ ) - outVector_1.setElement( j, 0 ); - + Matrix m_1( m_rows_1, m_cols_1, { + { 0, 0, 1 }, + { 1, 1, 2 }, + { 2, 1, 3 }, + { 3, 1, 4 }, { 3, 2, 5 } } ); + VectorType inVector_1( m_cols, 2 ); + VectorType outVector_1( m_rows, 1 ); m_1.vectorProduct( inVector_1, outVector_1 ); - EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); - EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); - EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); - EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 18 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 16 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 18 ); - /* + /** * Sets up the following 4x4 sparse matrix: * * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * \ 0 8 0 0 / + * | 2 0 6 8 | + * | 3 6 7 0 | + * \ 0 8 0 9 / */ const IndexType m_rows_2 = 4; const IndexType m_cols_2 = 4; - Matrix m_2( m_rows_2, m_cols_2 ); - typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; - /*rowLengths_2 = 3; - rowLengths_2.setElement( 1, 1 ); - rowLengths_2.setElement( 3, 1 );*/ - m_2.setCompressedRowLengths( rowLengths_2 ); - - RealType value_2 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_2.setElement( 0, i, value_2++ ); - - m_2.setElement( 1, 3, value_2++ ); // 1st row - - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_2.setElement( 2, i, value_2++ ); - - for( IndexType i = 1; i < 2; i++ ) // 3rd row - m_2.setElement( 3, i, value_2++ ); - - VectorType inVector_2; - inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) - inVector_2.setElement( i, 2 ); - - VectorType outVector_2; - outVector_2.setSize( m_rows_2 ); - for( IndexType j = 0; j < outVector_2.getSize(); j++ ) - outVector_2.setElement( j, 0 ); + Matrix m_2( m_rows_2, m_cols_2, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, + { 1, 0, 2 }, { 1, 2, 6 }, { 1, 3, 8 }, + { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 }, + { 3, 2, 8 }, { 3, 3, 9 } } ); + VectorType inVector_2( m_cols_2, 2 ); + VectorType outVector_2( m_rows_2, 0 ); m_2.vectorProduct( inVector_2, outVector_2 ); EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); - EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); - EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 32 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 32 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 34 ); /* * Sets up the following 4x4 sparse matrix: * - * / 1 2 3 0 \ - * | 0 4 5 6 | - * | 7 8 9 0 | - * \ 0 10 11 12 / + * / 1 2 3 4 \ + * | 2 5 0 0 | + * | 3 0 6 0 | + * \ 4 0 0 7 / */ const IndexType m_rows_3 = 4; const IndexType m_cols_3 = 4; - Matrix m_3( m_rows_3, m_cols_3 ); - typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; - m_3.setCompressedRowLengths( rowLengths_3 ); - - RealType value_3 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_3.setElement( 0, i, value_3++ ); - - for( IndexType i = 1; i < 4; i++ ) - m_3.setElement( 1, i, value_3++ ); // 1st row - - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_3.setElement( 2, i, value_3++ ); - - for( IndexType i = 1; i < 4; i++ ) // 3rd row - m_3.setElement( 3, i, value_3++ ); - - VectorType inVector_3; - inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) - inVector_3.setElement( i, 2 ); - - VectorType outVector_3; - outVector_3.setSize( m_rows_3 ); - for( IndexType j = 0; j < outVector_3.getSize(); j++ ) - outVector_3.setElement( j, 0 ); + Matrix m_3( m_rows_3, m_cols_3, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, { 0, 3, 4 }, + { 1, 0, 2 }, { 1, 1, 5 }, + { 2, 0, 3 }, { 2, 2, 6 }, + { 3, 0, 4 }, { 3, 3, 7 } + } ); + VectorType inVector_3( { 0, 1, 2, 3 } ); + VectorType outVector_3( m_rows_3, 0 ); m_3.vectorProduct( inVector_3, outVector_3 ); - EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); - EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); - EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + EXPECT_EQ( outVector_3.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 5 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 21 ); /* * Sets up the following 8x8 sparse matrix: * - * / 1 2 3 0 0 4 0 0 \ - * | 0 5 6 7 8 0 0 0 | - * | 9 10 11 12 13 0 0 0 | - * | 0 14 15 16 17 0 0 0 | - * | 0 0 18 19 20 21 0 0 | - * | 0 0 0 22 23 24 25 0 | - * | 26 27 28 29 30 0 0 0 | - * \ 31 32 33 34 35 0 0 0 / + * / 1 0 3 0 9 0 15 0 \ + * | 0 2 0 6 0 12 0 19 | + * | 3 0 5 0 10 0 16 0 | + * | 0 6 0 8 0 13 0 20 | + * | 9 0 10 0 11 0 17 0 | + * | 0 12 0 13 0 14 0 21 | + * | 15 0 16 0 17 0 18 0 | + * \ 0 19 0 20 0 21 0 22 / */ const IndexType m_rows_4 = 8; const IndexType m_cols_4 = 8; - Matrix m_4( m_rows_4, m_cols_4 ); - typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; - /*rowLengths_4.setSize( m_rows_4 ); - rowLengths_4.setValue( 4 ); - rowLengths_4.setElement( 2, 5 ); - rowLengths_4.setElement( 6, 5 ); - rowLengths_4.setElement( 7, 5 );*/ - m_4.setCompressedRowLengths( rowLengths_4 ); - - RealType value_4 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_4.setElement( 0, i, value_4++ ); - - m_4.setElement( 0, 5, value_4++ ); - - for( IndexType i = 1; i < 5; i++ ) // 1st row - m_4.setElement( 1, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 2nd row - m_4.setElement( 2, i, value_4++ ); - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_4.setElement( 3, i, value_4++ ); - - for( IndexType i = 2; i < 6; i++ ) // 4th row - m_4.setElement( 4, i, value_4++ ); - - for( IndexType i = 3; i < 7; i++ ) // 5th row - m_4.setElement( 5, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 6th row - m_4.setElement( 6, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 7th row - m_4.setElement( 7, i, value_4++ ); - - VectorType inVector_4; - inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) - inVector_4.setElement( i, 2 ); - - VectorType outVector_4; - outVector_4.setSize( m_rows_4 ); - for( IndexType j = 0; j < outVector_4.getSize(); j++ ) - outVector_4.setElement( j, 0 ); + Matrix m_4( m_rows_4, m_cols_4, { + { 0, 0, 1 }, + { 1, 1, 2 }, + { 2, 0, 3 }, { 2, 2, 5 }, + { 3, 1, 6 }, { 3, 3, 8 }, + { 4, 0, 9 }, { 4, 2, 10 }, { 4, 4, 11 }, + { 5, 1, 12 }, { 5, 3, 13 }, { 5, 5, 14 }, + { 6, 0, 15 }, { 6, 2, 16 }, { 6, 4, 17 }, { 6, 6, 18 }, + { 7, 1, 19 }, { 7, 3, 20 }, { 7, 5, 21 }, { 7, 7, 22 } + } ); + VectorType inVector_4 { 1, 2, 1, 2, 1, 2, 1, 2 }; + VectorType outVector_4( m_rows_4, 0 ); m_4.vectorProduct( inVector_4, outVector_4 ); - EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); - EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); - EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); - EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); - EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); - EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); - EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); - EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); - + EXPECT_EQ( outVector_4.getElement( 0 ), 28 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 78 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 34 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 94 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 47 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 120 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 66 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 164 ); +############################################ /* * Sets up the following 8x8 sparse matrix: * -- GitLab From e841b7d7e2c30e3acf3bc87578a1d3370c0cd22c Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Feb 2020 10:58:19 +0100 Subject: [PATCH 139/179] Fixed save and load in sparse and dense matrix. --- src/TNL/Matrices/Dense.hpp | 2 ++ src/TNL/Matrices/DenseMatrixView.hpp | 1 + src/TNL/Matrices/SparseMatrix.hpp | 1 + src/UnitTests/Matrices/DenseMatrixTest.h | 2 +- src/UnitTests/Matrices/SparseMatrixTest.h | 10 ++++------ 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 91a98e7f9..0d7037b1f 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -1128,6 +1128,8 @@ template< typename Real, void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); + this->segments.load( file ); + this->view = this->getView(); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index a11ff263c..00ca5edc2 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -920,6 +920,7 @@ template< typename Real, void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const { MatrixView< Real, Device, Index >::save( file ); + this->segments.save( file ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index df841230c..03273c98b 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -858,6 +858,7 @@ load( File& file ) Matrix< RealType, DeviceType, IndexType >::load( file ); file >> this->columnIndexes; this->segments.load( file ); + this->view = this->getView(); } template< typename Real, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index a3e7e8f61..37ae58bf1 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -1234,7 +1234,7 @@ void test_SaveAndLoad() ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - Matrix loadedMatrix( rows, cols ); + Matrix loadedMatrix; ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 72eb0b33b..45dc40578 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -48,17 +48,17 @@ void test_Constructors() EXPECT_EQ( m1.getColumns(), 6 ); Matrix m2( {1, 2, 2, 2, 1 }, 5 ); - typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; + typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; m2.setElement( 0, 0, 1 ); // 0th row m2.setElement( 1, 0, 1 ); // 1st row - m2.setElement( 1, 1, 1 ); + m2.setElement( 1, 1, 1 ); m2.setElement( 2, 1, 1 ); // 2nd row m2.setElement( 2, 2, 1 ); m2.setElement( 3, 2, 1 ); // 3rd row m2.setElement( 3, 3, 1 ); m2.setElement( 4, 4, 1 ); // 4th row m2.getCompressedRowLengths( v1 ); - + EXPECT_EQ( v1, v2 ); /* @@ -1297,9 +1297,7 @@ void test_SaveAndLoad( const char* filename ) ASSERT_NO_THROW( savedMatrix.save( filename ) ); - Matrix loadedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows, 3 ); - loadedMatrix.setCompressedRowLengths( rowLengths2 ); + Matrix loadedMatrix; ASSERT_NO_THROW( loadedMatrix.load( filename ) ); -- GitLab From 058f8ae9654fe8ccf3783cd1976a1a4c5bb95aa3 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Feb 2020 10:59:04 +0100 Subject: [PATCH 140/179] Implementing unit tests for symmetric sparse matrix. --- .../Matrices/SymmetricSparseMatrixTest.h | 351 ++++++++---------- 1 file changed, 148 insertions(+), 203 deletions(-) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index 1beddaf7e..8593ff3f1 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -569,7 +569,7 @@ void test_AddElement() const IndexType cols = 5; Matrix m( 6, 5, { - { 0, 0, 1 }, + { 0, 0, 1 }, { 1, 0, 2 }, { 1, 1, 3 }, { 2, 1, 4 }, { 2, 2, 5 }, { 3, 2, 6 }, { 3, 3, 7 }, @@ -796,79 +796,45 @@ void test_VectorProduct() EXPECT_EQ( outVector_4.getElement( 6 ), 66 ); EXPECT_EQ( outVector_4.getElement( 7 ), 164 ); -############################################ /* * Sets up the following 8x8 sparse matrix: * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 + * / 1 0 0 0 0 0 0 0 \ + * | 0 2 0 0 0 0 0 0 | + * | 0 0 3 4 6 9 0 0 | + * | 0 0 4 5 7 10 0 0 | + * | 0 0 6 7 8 11 0 0 | + * | 0 0 9 10 11 12 0 0 | + * | 0 0 0 0 0 0 13 0 | + * \ 0 0 0 0 0 0 0 14 / */ const IndexType m_rows_5 = 8; const IndexType m_cols_5 = 8; - Matrix m_5( m_rows_5, m_cols_5 ); - typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m_5.setCompressedRowLengths( rowLengths_5 ); - - RealType value_5 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_5.setElement( 0, i, value_5++ ); - - m_5.setElement( 0, 4, value_5++ ); // 0th row - m_5.setElement( 0, 5, value_5++ ); - - m_5.setElement( 1, 1, value_5++ ); // 1st row - m_5.setElement( 1, 3, value_5++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_5.setElement( 2, i, value_5++ ); - - m_5.setElement( 2, 4, value_5++ ); // 2nd row - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_5.setElement( 3, i, value_5++ ); - - m_5.setElement( 4, 1, value_5++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_5.setElement( 5, i, value_5++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_5.setElement( 6, i, value_5++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_5.setElement( 7, i, value_5++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m_5.setElement( i, 7, 1); - - VectorType inVector_5; - inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) - inVector_5.setElement( i, 2 ); - - VectorType outVector_5; - outVector_5.setSize( m_rows_5 ); - for( IndexType j = 0; j < outVector_5.getSize(); j++ ) - outVector_5.setElement( j, 0 ); + Matrix m_5( m_rows_5, m_cols_5,{ + { 0, 0, 1 }, + { 1, 1, 2, }, + { 2, 2, 3 }, { 2, 3, 4 }, { 2, 4, 6 }, { 2, 5, 9 }, + { 3, 2, 4 }, { 3, 3, 5 }, { 3, 4, 7 }, { 3, 5, 10 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 4, 5, 11 }, + { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, + { 6, 6, 13 }, + { 7, 7, 14 } + } ); + VectorType inVector_5( m_cols_5, { 1, 2, 3, 4, 5, 6, 7, 8 } ); + VectorType outVector_5( m_rows_5, 0.0 ); m_5.vectorProduct( inVector_5, outVector_5 ); - EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); - EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); - EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); - EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); - EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); - EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); + EXPECT_EQ( outVector_5.getElement( 0 ), 1*1 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 2*2 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 3*3 + 4*4 + 5*6 + 6*9 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 3*4 + 4*5 + 5*7 + 6*10 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 3*6 + 4*7 + 5*8 + 6*11 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 3*9 + 4*10 + 5*11 + 6*12 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 7*13 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 8*14 ); } template< typename Matrix > @@ -881,55 +847,29 @@ void test_RowsReduction() /* * Sets up the following 8x8 sparse matrix: * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 + * / 1 0 0 0 0 0 0 0 \ + * | 0 2 0 0 0 0 0 0 | + * | 0 0 3 4 6 9 0 0 | + * | 0 0 4 5 7 10 0 0 | + * | 0 0 6 7 8 11 0 0 | + * | 0 0 9 10 11 12 0 0 | + * | 0 0 0 0 0 0 13 0 | + * \ 0 0 0 0 0 0 0 14 / */ - const IndexType rows = 8; - const IndexType cols = 8; - - Matrix m; - m.setDimensions( rows, cols ); - typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m.setCompressedRowLengths( rowsCapacities ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - m.setElement( 0, 4, value++ ); // 0th row - m.setElement( 0, 5, value++ ); - - m.setElement( 1, 1, value++ ); // 1st row - m.setElement( 1, 3, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - m.setElement( 2, 4, value++ ); // 2nd row - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - m.setElement( 4, 1, value++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m.setElement( 7, i, value++ ); + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m.setElement( i, 7, 1); + Matrix m_5( m_rows_5, m_cols_5,{ + { 0, 0, 1 }, + { 1, 1, 2, }, + { 2, 2, 3 }, { 2, 3, 4 }, { 2, 4, 6 }, { 2, 5, 9 }, + { 3, 2, 4 }, { 3, 3, 5 }, { 3, 4, 7 }, { 3, 5, 10 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 4, 5, 11 }, + { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, + { 6, 6, 13 }, + { 7, 7, 14 } + } ); //// // Compute number of non-zero elements in rows. @@ -986,24 +926,13 @@ void test_PerformSORIteration() const IndexType m_rows = 4; const IndexType m_cols = 4; - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; - m.setCompressedRowLengths( rowLengths ); - - m.setElement( 0, 0, 4.0 ); // 0th row - m.setElement( 0, 1, 1.0); - - m.setElement( 1, 0, 1.0 ); // 1st row - m.setElement( 1, 1, 4.0 ); - m.setElement( 1, 2, 1.0 ); - - m.setElement( 2, 1, 1.0 ); // 2nd row - m.setElement( 2, 2, 4.0 ); - m.setElement( 2, 3, 1.0 ); - - m.setElement( 3, 2, 1.0 ); // 3rd row - m.setElement( 3, 3, 4.0 ); + Matrix m( m_rows, m_cols, { + { 0, 0, 4 }, { 0, 1, 1 }, + { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 }, + { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 }, + { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 }, + { 4, 3, 1 }, { 4, 4, 4 } + } ); RealType bVector [ 4 ] = { 1, 1, 1, 1 }; RealType xVector [ 4 ] = { 1, 1, 1, 1 }; @@ -1048,41 +977,70 @@ void test_SaveAndLoad( const char* filename ) using IndexType = typename Matrix::IndexType; /* - * Sets up the following 4x4 sparse matrix: + * Sets up the following 6x5 sparse matrix: * - * / 1 2 3 0 \ - * | 0 4 0 5 | - * | 6 7 8 0 | - * \ 0 9 10 11 / + * / 1 2 0 0 0 \ + * | 2 3 4 0 0 | + * | 0 4 5 6 0 | + * | 0 0 6 7 8 | + * | 0 0 0 8 9 | + * \ 0 0 0 0 10 / */ - const IndexType m_rows = 4; - const IndexType m_cols = 4; + const IndexType rows = 6; + const IndexType cols = 5; - Matrix savedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; - savedMatrix.setCompressedRowLengths( rowLengths ); + Matrix savedMatrix( 6, 5, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 1, 4 }, { 2, 2, 5 }, + { 3, 2, 6 }, { 3, 3, 7 }, + { 4, 3, 8 }, { 4, 4, 9 }, + { 5, 5, 10 } } ); - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - savedMatrix.setElement( 0, i, value++ ); + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - savedMatrix.setElement( 1, 1, value++ ); - savedMatrix.setElement( 1, 3, value++ ); // 1st row + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - savedMatrix.setElement( 2, i, value++ ); + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 4 ); + EXPECT_EQ( m.getElement( 2, 2 ), 5 ); + EXPECT_EQ( m.getElement( 2, 3 ), 6 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - savedMatrix.setElement( 3, i, value++ ); + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 6 ); + EXPECT_EQ( m.getElement( 3, 3 ), 7 ); + EXPECT_EQ( m.getElement( 3, 4 ), 8 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 8 ); + EXPECT_EQ( m.getElement( 4, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 10 ); ASSERT_NO_THROW( savedMatrix.save( filename ) ); - Matrix loadedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); - rowLengths2 = 3; - loadedMatrix.setCompressedRowLengths( rowLengths2 ); + Matrix loadedMatrix; + //typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); + //rowLengths2 = 3; + //loadedMatrix.setCompressedRowLengths( rowLengths2 ); ASSERT_NO_THROW( loadedMatrix.load( filename ) ); @@ -1090,42 +1048,43 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 5 ), loadedMatrix.getElement( 0, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 5 ), loadedMatrix.getElement( 1, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 5 ), loadedMatrix.getElement( 2, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); - + EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 5 ), loadedMatrix.getElement( 3, 5 ) ); + + EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 5 ), loadedMatrix.getElement( 4, 5 ) ); + + EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 5 ), loadedMatrix.getElement( 5, 5 ) ); EXPECT_EQ( std::remove( filename ), 0 ); } @@ -1137,37 +1096,24 @@ void test_Print() using IndexType = typename Matrix::IndexType; /* - * Sets up the following 5x4 sparse matrix: + * Sets up the following 4x4 sparse matrix: * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * | 0 8 9 10 | - * \ 0 0 11 12 / + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / */ - const IndexType m_rows = 5; + const IndexType m_rows = 4; const IndexType m_cols = 4; - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows ); - rowLengths = 3; - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - m.setElement( 1, 3, value++ ); // 1st row - - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, value++ ); + Matrix m( m_rows, m_cols, { + { 0, 0, 4 }, { 0, 1, 1 }, + { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 }, + { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 }, + { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 }, + { 4, 3, 1 }, { 4, 4, 4 } + } ); std::stringstream printed; std::stringstream couted; @@ -1179,11 +1125,10 @@ void test_Print() std::cout.rdbuf(old_buf); //reset - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" - "Row: 1 -> Col:3->4\t\n" - "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" - "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" - "Row: 4 -> Col:2->11 Col:3->12\t\n"; + couted << "Row: 0 -> Col:0->4 Col:1->1\t\n" + "Row: 1 -> Col:0->1 Col:1->4 Col:2->1\t\n" + "Row: 2 -> Col:1->1 Col:2->4 Col:3->1\t\n" + "Row: 3 -> Col:2->1 Col:3->4\t\n"; EXPECT_EQ( printed.str(), couted.str() ); } -- GitLab From c5c27c5d7f4207afbea7726662f61d6ad7af97fa Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Feb 2020 11:11:57 +0100 Subject: [PATCH 141/179] Fixing binary sparse matrix unit tests. --- .../Matrices/BinarySparseMatrixTest.hpp | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp index 8b3d8f833..276c432ff 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -1078,9 +1078,7 @@ void test_SaveAndLoad( const char* filename ) const IndexType m_cols = 4; Matrix savedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); savedMatrix.setCompressedRowLengths( rowLengths ); for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row @@ -1098,14 +1096,6 @@ void test_SaveAndLoad( const char* filename ) ASSERT_NO_THROW( savedMatrix.save( filename ) ); Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths2; - rowLengths2.setSize( m_rows ); - rowLengths2.setValue( 3 ); - loadedMatrix.setCompressedRowLengths( rowLengths2 ); - - ASSERT_NO_THROW( loadedMatrix.load( filename ) ); @@ -1172,12 +1162,8 @@ void test_Print() const IndexType m_rows = 5; const IndexType m_cols = 4; - Matrix m; - m.reset(); - m.setDimensions( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths; - rowLengths.setSize( m_rows ); - rowLengths.setValue( 3 ); + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); m.setCompressedRowLengths( rowLengths ); RealType value = 1; -- GitLab From 95966e9cb67f9558409db4ff145c0b5d4d90c0d4 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Feb 2020 11:25:33 +0100 Subject: [PATCH 142/179] Adding copyright header to sparse matric unit tests source files. --- src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp | 10 ++++++++++ src/UnitTests/Matrices/SparseMatrixTest_CSR.cu | 10 ++++++++++ src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp | 10 ++++++++++ src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu | 10 ++++++++++ .../Matrices/SparseMatrixTest_SlicedEllpack.cpp | 10 ++++++++++ .../Matrices/SparseMatrixTest_SlicedEllpack.cu | 10 ++++++++++ ...parseMatrixTest.h => SymmetricSparseMatrixTest.hpp} | 0 7 files changed, 60 insertions(+) rename src/UnitTests/Matrices/{SymmetricSparseMatrixTest.h => SymmetricSparseMatrixTest.hpp} (100%) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp index 258ad2c53..5830658ab 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_CSR.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu index 258ad2c53..91f0de81a 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_CSR.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp index c454706f0..3c30c54c5 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu index c454706f0..9a27cece6 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp index 40e2e94b8..2c79ee502 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu index 40e2e94b8..bff81d9a3 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp similarity index 100% rename from src/UnitTests/Matrices/SymmetricSparseMatrixTest.h rename to src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp -- GitLab From db8ab1ee0491535ac3e13322d33b4d2e84e7c4f3 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 11 Feb 2020 11:28:37 +0100 Subject: [PATCH 143/179] Added symmetric sparse matrix unit tests for CSR format. --- src/UnitTests/Matrices/CMakeLists.txt | 8 ++ .../Matrices/SymmetricSparseMatrixTest.h | 114 ++++++++++++++++++ .../SymmetricSparseMatrixTest_CSR.cpp | 11 ++ .../Matrices/SymmetricSparseMatrixTest_CSR.cu | 11 ++ .../Matrices/SymmetricSparseMatrixTest_CSR.h | 61 ++++++++++ 5 files changed, 205 insertions(+) create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest.h create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu create mode 100644 src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 8da67ef6b..1c536a982 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -37,6 +37,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -86,6 +89,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( SymmetricSparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -100,6 +107,7 @@ ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatri ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h new file mode 100644 index 000000000..659f555c3 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -0,0 +1,114 @@ +/*************************************************************************** + SymmetricSparseMatrixTest.h - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include +#include +#include +#include "SparseMatrixTest.hpp" + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes); + +TYPED_TEST( MatrixTest, Constructors ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Constructors< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setCompressedRowLengthsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetCompressedRowLengths< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetRow< MatrixType >(); +} + + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +TYPED_TEST( MatrixTest, rowsReduction ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_RowsReduction< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >( saveAndLoadTestFileName ); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp new file mode 100644 index 000000000..c23fa4242 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.cpp - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SymmetricSparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu new file mode 100644 index 000000000..df1d83da0 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.cu - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SymmetricSparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h new file mode 100644 index 000000000..bbb6c66cb --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -0,0 +1,61 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.h - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include +#include +#include + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > +#endif // HAVE_CUDA +>; + +const char* saveAndLoadTestFileName "test_SymmetricSparseMatrixTest_CSR_segments"; + +#include "SparseMatrixTest.h" + +#endif // HAVE_GTEST + +#include "../main.h" -- GitLab From 6f43c59aac4b6dcdb4cdb6894b775c2ed24a9f40 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 12 Feb 2020 13:19:34 +0100 Subject: [PATCH 144/179] Fixing symmetric sparse matrix unit tests. --- .../Matrices/SymmetricSparseMatrixTest.h | 11 +-- .../Matrices/SymmetricSparseMatrixTest.hpp | 98 ++++++++++--------- .../Matrices/SymmetricSparseMatrixTest_CSR.h | 16 +-- 3 files changed, 59 insertions(+), 66 deletions(-) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index 659f555c3..5582b138d 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -12,7 +12,7 @@ #include #include #include -#include "SparseMatrixTest.hpp" +#include "SymmetricSparseMatrixTest.hpp" // test fixture for typed tests template< typename Matrix > @@ -24,13 +24,6 @@ protected: TYPED_TEST_SUITE( MatrixTest, MatrixTypes); -TYPED_TEST( MatrixTest, Constructors ) -{ - using MatrixType = typename TestFixture::MatrixType; - - test_Constructors< MatrixType >(); -} - TYPED_TEST( MatrixTest, setDimensionsTest ) { using MatrixType = typename TestFixture::MatrixType; @@ -110,5 +103,3 @@ TYPED_TEST( MatrixTest, printTest ) } #endif - -#include "../main.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index 8593ff3f1..75b121060 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -177,13 +177,13 @@ void test_GetNumberOfNonzeroMatrixElements() { 0, 0, 1 }, { 1, 0, 2 }, { 1, 1, 3 }, { 2, 0, 4 }, { 2, 1, 5 }, { 2, 2, 6 }, - { 3, 0, 7 }, { 3, 1, 8 }, , { 3, 3, 9 }, + { 3, 0, 7 }, { 3, 1, 8 }, { 3, 3, 9 }, { 4, 1, 10 }, { 4, 2, 11 }, { 4, 4, 12 }, { 5, 1, 13 }, { 5, 2, 14 }, { 5, 5, 15 }, { 6, 1, 16 }, { 6, 3, 17 }, { 6, 6, 18 }, { 7, 1, 19 }, { 7, 3, 20 }, { 7, 7, 21 }, { 8, 2, 22 }, { 8, 4, 23 }, { 8, 8, 24 }, - { 9, 2, 25 }, { 9, 4, 26 }, { 9, 9, 27 } + { 9, 2, 25 }, { 9, 4, 26 }, { 9, 9, 27 }, { 10, 2, 28 }, { 10, 4, 29 }, { 10, 10, 30 } } ); @@ -260,7 +260,7 @@ void test_GetRow() case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break; } }; - TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f ); EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); @@ -692,8 +692,8 @@ void test_VectorProduct() { 2, 1, 3 }, { 3, 1, 4 }, { 3, 2, 5 } } ); - VectorType inVector_1( m_cols, 2 ); - VectorType outVector_1( m_rows, 1 ); + VectorType inVector_1( m_cols_1, 2.0 ); + VectorType outVector_1( m_rows_1, 0.0 ); m_1.vectorProduct( inVector_1, outVector_1 ); EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); @@ -823,7 +823,7 @@ void test_VectorProduct() { 7, 7, 14 } } ); - VectorType inVector_5( m_cols_5, { 1, 2, 3, 4, 5, 6, 7, 8 } ); + VectorType inVector_5( { 1, 2, 3, 4, 5, 6, 7, 8 } ); VectorType outVector_5( m_rows_5, 0.0 ); m_5.vectorProduct( inVector_5, outVector_5 ); @@ -873,7 +873,8 @@ void test_RowsReduction() //// // Compute number of non-zero elements in rows. - typename Matrix::RowsCapacitiesType rowLengths( rows ); + typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 ); + typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } ); auto rowLengths_view = rowLengths.getView(); auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return ( value != 0.0 ); @@ -884,14 +885,15 @@ void test_RowsReduction() auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { rowLengths_view[ rowIdx ] = value; }; - m.allRowsReduction( fetch, reduce, keep, 0 ); - EXPECT_EQ( rowsCapacities, rowLengths ); - m.getCompressedRowLengths( rowLengths ); - EXPECT_EQ( rowsCapacities, rowLengths ); + m_5.allRowsReduction( fetch, reduce, keep, 0 ); + + EXPECT_EQ( rowLengths_true, rowLengths ); + m_5.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowLengths_true, rowLengths ); //// // Compute max norm - TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() ); auto rowSums_view = rowSums.getView(); auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return abs( value ); @@ -902,7 +904,7 @@ void test_RowsReduction() auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { rowSums_view[ rowIdx ] = value; }; - m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); const RealType maxNorm = TNL::max( rowSums ); EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 } @@ -999,41 +1001,41 @@ void test_SaveAndLoad( const char* filename ) { 5, 5, 10 } } ); // Check the set elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 0 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 2 ); - EXPECT_EQ( m.getElement( 1, 1 ), 3 ); - EXPECT_EQ( m.getElement( 1, 2 ), 4 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 4 ); - EXPECT_EQ( m.getElement( 2, 2 ), 5 ); - EXPECT_EQ( m.getElement( 2, 3 ), 6 ); - EXPECT_EQ( m.getElement( 2, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 0 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 6 ); - EXPECT_EQ( m.getElement( 3, 3 ), 7 ); - EXPECT_EQ( m.getElement( 3, 4 ), 8 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 8 ); - EXPECT_EQ( m.getElement( 4, 4 ), 9 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 3, 4 ), 8 ); + + EXPECT_EQ( savedMatrix.getElement( 4, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 3 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 4, 4 ), 9 ); + + EXPECT_EQ( savedMatrix.getElement( 5, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 4 ), 10 ); ASSERT_NO_THROW( savedMatrix.save( filename ) ); diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h index bbb6c66cb..45dd5e5b9 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -14,12 +14,12 @@ #include // test fixture for typed tests -template< typename Matrix > -class MatrixTest : public ::testing::Test -{ -protected: - using MatrixType = Matrix; -}; +//template< typename Matrix > +//class MatrixTest : public ::testing::Test +//{ +//protected: +// using MatrixType = Matrix; +//}; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types @@ -52,9 +52,9 @@ using MatrixTypes = ::testing::Types #endif // HAVE_CUDA >; -const char* saveAndLoadTestFileName "test_SymmetricSparseMatrixTest_CSR_segments"; +const char* saveAndLoadTestFileName = "test_SymmetricSparseMatrixTest_CSR_segments"; -#include "SparseMatrixTest.h" +#include "SymmetricSparseMatrixTest.h" #endif // HAVE_GTEST -- GitLab From 537a8805d134831e757cb919a37e21d9883cf057 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 13 Feb 2020 18:35:22 +0100 Subject: [PATCH 145/179] Implementintg symmetric sparse matrix. --- src/TNL/Matrices/SparseMatrix.hpp | 17 +++ src/TNL/Matrices/SparseMatrixView.h | 4 +- src/TNL/Matrices/SparseMatrixView.hpp | 101 ++++++++++++++---- .../Matrices/SymmetricSparseMatrixTest.h | 7 ++ .../Matrices/SymmetricSparseMatrixTest.hpp | 37 +++++-- 5 files changed, 132 insertions(+), 34 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 03273c98b..3eccc7211 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -11,6 +11,7 @@ #pragma once #include +#include #include #include @@ -107,11 +108,27 @@ SparseMatrix( const IndexType rows, { Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 ); for( const auto& i : data ) + { + if( std::get< 0 >( i ) >= rows ) + { + std::stringstream s; + s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } rowCapacities[ std::get< 0 >( i ) ]++; + } SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); hostMatrix.setCompressedRowLengths( rowCapacities ); for( const auto& i : data ) + { + if( std::get< 1 >( i ) >= columns ) + { + std::stringstream s; + s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); + } ( *this ) = hostMatrix; } diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 2756c80d7..54d4f1766 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -98,8 +98,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > const IndexType column, const RealType& value ); - void addElement( const IndexType row, - const IndexType column, + void addElement( IndexType row, + IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 16a8bc62f..62300217f 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -153,10 +153,36 @@ getNumberOfNonzeroMatrixElements() const { const auto columns_view = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); - auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { - return ( columns_view[ i ] != paddingIndex ); - }; - return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + if( ! isSymmetric() ) + { + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( columns_view[ i ] != paddingIndex ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + } + else + { + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 ); + auto row_sums_view = row_sums.getView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + const IndexType column = columnIndexesView[ globalIdx ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + row_sums_view[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + return sum( row_sums ); + } } template< typename Real, @@ -206,8 +232,8 @@ template< typename Real, template< typename, typename > class SegmentsView > void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -addElement( const IndexType row, - const IndexType column, +addElement( IndexType row, + IndexType column, const RealType& value, const RealType& thisElementMultiplicator ) { @@ -216,6 +242,13 @@ addElement( const IndexType row, TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); + if( isSymmetric() && row < column ) + { + swap( row, column ); + TNL_ASSERT_LT( row, this->getRows(), "Column index is out of the symmetric part of the matrix after transposition." ); + TNL_ASSERT_LT( column,this->getColumns(), "Row index is out of the symmetric part of the matrix after transposition." ); + } + const IndexType rowSize = this->segments.getSegmentSize( row ); IndexType col( this->getPaddingIndex() ); IndexType i; @@ -276,14 +309,21 @@ template< typename Real, template< typename, typename > class SegmentsView > Real SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: -getElement( const IndexType row, - const IndexType column ) const +getElement( IndexType row, + IndexType column ) const { TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." ); TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." ); TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); + if( isSymmetric() && row < column ) + { + swap( row, column ); + if( row >= this->getRows() || column >= this->getColumns() ) + return 0.0; + } + const IndexType rowSize = this->segments.getSegmentSize( row ); for( IndexType i = 0; i < rowSize; i++ ) { @@ -588,25 +628,40 @@ void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: print( std::ostream& str ) const { - for( IndexType row = 0; row < this->getRows(); row++ ) + if( isSymmetric() ) { - str <<"Row: " << row << " -> "; - const IndexType rowLength = this->segments.getSegmentSize( row ); - for( IndexType i = 0; i < rowLength; i++ ) + for( IndexType row = 0; row < this->getRows(); row++ ) { - const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); - const IndexType column = this->columnIndexes.getElement( globalIdx ); - if( column == this->getPaddingIndex() ) - break; - RealType value; - if( isBinary() ) - value = 1.0; - else - value = this->values.getElement( globalIdx ); - str << " Col:" << column << "->" << value << "\t"; + str <<"Row: " << row << " -> "; + for( IndexType column = 0; column < this->getColumns(); column++ ) + { + auto value = this->getElement( row, column ); + if( value ) + str << " Col:" << column << "->" << value << "\t"; + } + str << std::endl; } - str << std::endl; } + else + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + const auto rowLength = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowLength; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + const IndexType column = this->columnIndexes.getElement( globalIdx ); + if( column == this->getPaddingIndex() ) + break; + RealType value; + if( isBinary() ) + value = ( RealType ) 1.0; + else + value = this->values.getElement( globalIdx ); + str << " Col:" << column << "->" << value << "\t"; + } + str << std::endl; + } } template< typename Real, diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index 5582b138d..02fd8c585 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -45,6 +45,13 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + TYPED_TEST( MatrixTest, resetTest ) { using MatrixType = typename TestFixture::MatrixType; diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index 75b121060..7c8278422 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -73,11 +73,11 @@ void test_SetCompressedRowLengths() | 24 25 26 | \ 27 28 30 / */ - const IndexType rows = 10; + const IndexType rows = 11; const IndexType cols = 11; Matrix m( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; m.setCompressedRowLengths( rowLengths ); // Insert values into the rows. @@ -120,9 +120,25 @@ void test_SetCompressedRowLengths() m.setElement( 7, 3, value++ ); m.setElement( 7, 7, value++ ); + // 8th row - lower part + m.setElement( 8, 2, value++ ); + m.setElement( 8, 4, value++ ); + m.setElement( 8, 8, value++ ); + + // 8th row - lower part + m.setElement( 9, 2, value++ ); + m.setElement( 9, 4, value++ ); + m.setElement( 9, 9, value++ ); + + // 8th row - lower part + m.setElement( 10, 2, value++ ); + m.setElement( 10, 5, value++ ); + m.setElement( 10, 10, value++ ); + rowLengths = 0; m.getCompressedRowLengths( rowLengths ); - typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; EXPECT_EQ( rowLengths, correctRowLengths ); } @@ -170,8 +186,8 @@ void test_GetNumberOfNonzeroMatrixElements() 49 */ - const IndexType rows = 10; - const IndexType cols = 10; + const IndexType rows = 11; + const IndexType cols = 11; Matrix m( rows, cols, { { 0, 0, 1 }, @@ -276,7 +292,7 @@ void test_GetRow() EXPECT_EQ( m.getElement( 1, 0 ), 2 ); EXPECT_EQ( m.getElement( 1, 1 ), 3 ); - EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); EXPECT_EQ( m.getElement( 1, 3 ), 8 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); EXPECT_EQ( m.getElement( 1, 5 ), 13 ); @@ -421,7 +437,7 @@ void test_SetElement() Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 ); RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) + for( IndexType i = 0; i < 3; i++ ) m.setElement( i, i, value++ ); for( IndexType i = 0; i < 4; i++ ) @@ -574,7 +590,7 @@ void test_AddElement() { 2, 1, 4 }, { 2, 2, 5 }, { 3, 2, 6 }, { 3, 3, 7 }, { 4, 3, 8 }, { 4, 4, 9 }, - { 5, 5, 10 } } ); + { 5, 4, 10 } } ); // Check the set elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); @@ -626,9 +642,12 @@ void test_AddElement() */ for( IndexType i = 1; i < rows; i++ ) + { m.addElement( i, i - 1, 1.0, 2.0 ); + m.addElement( i, i, 0.0, 2.0 ); + } - + std::cerr << m << std::endl; EXPECT_EQ( m.getElement( 0, 0 ), 2 ); EXPECT_EQ( m.getElement( 0, 1 ), 5 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); -- GitLab From 4e62a8fff15e08c8935f1269bc755f381468c907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 24 Feb 2020 21:25:07 +0100 Subject: [PATCH 146/179] Fixing symmetric sparse matrix. --- src/TNL/Matrices/SparseMatrix.h | 2 +- src/TNL/Matrices/SparseMatrix.hpp | 4 ++-- src/TNL/Matrices/SparseMatrixView.h | 4 ++-- src/TNL/Matrices/SparseMatrixView.hpp | 18 +++++++++++++++--- .../Matrices/SymmetricSparseMatrixTest.hpp | 9 +++++---- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 573d382ce..81422eef3 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -145,7 +145,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator = 1.0, - const RealType& inVectorAddition = 0.0 ) const; + const RealType& outVectorMultiplicator = 0.0 ) const; /*template< typename Real2, typename Index2 > void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 3eccc7211..22ca56940 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -396,9 +396,9 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAll vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, - const RealType& inVectorAddition ) const + const RealType& outVectorMultiplicator ) const { - this->view.vectorProduct( inVector, outVector, matrixMultiplicator, inVectorAddition ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator ); /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index 54d4f1766..a611b5467 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -118,8 +118,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector, - const RealType& matrixMultiplicator = 1.0, - const RealType& inVectorAddition = 0.0 ) const; + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0 ) const; template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 62300217f..87f4f1038 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace TNL { namespace Matrices { @@ -367,8 +368,8 @@ void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: vectorProduct( const InVector& inVector, OutVector& outVector, - const RealType& matrixMultiplicator, - const RealType& inVectorAddition ) const + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); @@ -378,11 +379,19 @@ vectorProduct( const InVector& inVector, const auto valuesView = this->values.getConstView(); const auto columnIndexesView = this->columnIndexes.getConstView(); const IndexType paddingIndex = this->getPaddingIndex(); + if( isSymmetric() ) + outVector *= outVectorMultiplicator; auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; + if( isSymmetric() ) + { + TNL_ASSERT_TRUE( false, "" ); + //Atomic< RealType, DeviceType > atomic; + //if( isBinary() ) + } if( isBinary() ) return inVectorView[ column ]; return valuesView[ globalIdx ] * inVectorView[ column ]; @@ -391,7 +400,10 @@ vectorProduct( const InVector& inVector, sum += value; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - outVectorView[ row ] = value; + if( outVectorMultiplicator == 0.0 ) + outVectorView[ row ] = matrixMultiplicator * value; + else + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index 7c8278422..193c1e031 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -641,13 +641,14 @@ void test_AddElement() * \ 0 0 0 0 10 / \ 0 0 0 0 1 / \ 0 0 0 0 21 / */ - for( IndexType i = 1; i < rows; i++ ) + for( IndexType i = 0; i < rows; i++ ) { - m.addElement( i, i - 1, 1.0, 2.0 ); - m.addElement( i, i, 0.0, 2.0 ); + if( i > 0 ) + m.addElement( i, i - 1, 1.0, 2.0 ); + if( i < cols ) + m.addElement( i, i, 0.0, 2.0 ); } - std::cerr << m << std::endl; EXPECT_EQ( m.getElement( 0, 0 ), 2 ); EXPECT_EQ( m.getElement( 0, 1 ), 5 ); EXPECT_EQ( m.getElement( 0, 2 ), 0 ); -- GitLab From 335dd0fc17225ea55af0d25a22f8b605fdaede58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Mon, 24 Feb 2020 21:32:38 +0100 Subject: [PATCH 147/179] Fixing sparse matrix to work with StaticVector as RealType. --- src/TNL/Matrices/SparseMatrixView.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 87f4f1038..cb6afd8e4 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -168,20 +168,20 @@ getNumberOfNonzeroMatrixElements() const Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 ); auto row_sums_view = row_sums.getView(); const auto columnIndexesView = this->columnIndexes.getConstView(); - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements }; - auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) { sum += value; }; - auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable { row_sums_view[ row ] = value; }; - this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( IndexType ) 0 ); return sum( row_sums ); } } @@ -648,7 +648,7 @@ print( std::ostream& str ) const for( IndexType column = 0; column < this->getColumns(); column++ ) { auto value = this->getElement( row, column ); - if( value ) + if( value != ( RealType ) 0 ) str << " Col:" << column << "->" << value << "\t"; } str << std::endl; -- GitLab From 46ee01f39ef067724c2fb6c79e7662295f0570af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 25 Feb 2020 22:04:13 +0100 Subject: [PATCH 148/179] Fixes of SegmentsViewType and SegmentViewType. --- src/TNL/Containers/Segments/Ellpack.h | 2 +- src/TNL/Containers/Segments/EllpackView.h | 2 +- src/TNL/Containers/Segments/SlicedEllpack.h | 2 +- src/TNL/Containers/Segments/SlicedEllpackView.h | 2 +- src/TNL/Matrices/SparseMatrix.h | 1 + src/TNL/Matrices/SparseMatrixView.h | 5 ++--- src/TNL/Matrices/SparseMatrixView.hpp | 1 + 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index c197c7010..6edacb1cf 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -34,7 +34,7 @@ class Ellpack using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; template< typename Device_, typename Index_ > - using ViewTemplate = EllpackView< Device_, Index_ >; + using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 3870f0802..dcbc56d1b 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -35,7 +35,7 @@ class EllpackView using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; template< typename Device_, typename Index_ > - using ViewTemplate = EllpackView< Device_, Index_ >; + using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; using ViewType = EllpackView; using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 5953cde36..e1cdfa1d4 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -35,7 +35,7 @@ class SlicedEllpack static constexpr bool getRowMajorOrder() { return RowMajorOrder; } using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; template< typename Device_, typename Index_ > - using ViewTemplate = SlicedEllpackView< Device_, Index_ >; + using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 2b310a805..23001553c 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -33,7 +33,7 @@ class SlicedEllpackView static constexpr int getSliceSize() { return SliceSize; } static constexpr bool getRowMajorOrder() { return RowMajorOrder; } template< typename Device_, typename Index_ > - using ViewTemplate = SlicedEllpackView< Device_, Index_ >; + using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; using ViewType = SlicedEllpackView; using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 81422eef3..e31d9d6ba 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -40,6 +40,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using SegmentsType = Segments< Device, Index, IndexAllocator >; template< typename Device_, typename Index_ > using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >; + using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; using DeviceType = Device; using IndexType = Index; diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index a611b5467..4fa65b70a 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -51,7 +51,6 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - __cuda_callable__ SparseMatrixView(); @@ -103,8 +102,8 @@ class SparseMatrixView : public MatrixView< Real, Device, Index > const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); - RealType getElement( const IndexType row, - const IndexType column ) const; + RealType getElement( IndexType row, + IndexType column ) const; template< typename Vector > __cuda_callable__ diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index cb6afd8e4..4e5244806 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -209,6 +209,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: getRow( const IndexType& rowIdx ) -> RowView { TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + typename RowView::SegmentViewType t = this->segments.getSegmentView( rowIdx ); return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); } -- GitLab From 91b56fdcca41f3532f31812c801cdc00ea6678ce Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 26 Feb 2020 13:50:34 +0100 Subject: [PATCH 149/179] Added a method SparseMatrix::setElements. --- src/TNL/Matrices/SparseMatrix.h | 2 + src/TNL/Matrices/SparseMatrix.hpp | 64 +++++++++++++++++++------------ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index e31d9d6ba..c7f953a8c 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -102,6 +102,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > this->setCompressedRowLengths( rowLengths ); }; + void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ); + template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 22ca56940..4c1f3b1ce 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -106,30 +106,7 @@ SparseMatrix( const IndexType rows, const IndexAllocatorType& indexAllocator ) : BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) { - Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 ); - for( const auto& i : data ) - { - if( std::get< 0 >( i ) >= rows ) - { - std::stringstream s; - s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list"; - throw std::logic_error( s.str() ); - } - rowCapacities[ std::get< 0 >( i ) ]++; - } - SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); - hostMatrix.setCompressedRowLengths( rowCapacities ); - for( const auto& i : data ) - { - if( std::get< 1 >( i ) >= columns ) - { - std::stringstream s; - s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list"; - throw std::logic_error( s.str() ); - } - hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); - } - ( *this ) = hostMatrix; + this->setElements( data ); } template< typename Real, @@ -231,6 +208,45 @@ setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) this->view = this->getView(); } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ) +{ + const auto& rows = this->getRows(); + const auto& columns = this->getColumns(); + Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 ); + for( const auto& i : data ) + { + if( std::get< 0 >( i ) >= rows ) + { + std::stringstream s; + s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } + rowCapacities[ std::get< 0 >( i ) ]++; + } + SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); + hostMatrix.setCompressedRowLengths( rowCapacities ); + for( const auto& i : data ) + { + if( std::get< 1 >( i ) >= columns ) + { + std::stringstream s; + s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } + hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); + } + ( *this ) = hostMatrix; +} + template< typename Real, typename Device, typename Index, -- GitLab From 036d57b0de44f95b44acf3876b80e5d40367c247 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 26 Feb 2020 13:51:03 +0100 Subject: [PATCH 150/179] Fixed SparseMatrix::addElement unit test. --- src/UnitTests/Matrices/SparseMatrixTest.h | 42 +++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 45dc40578..30d3a692d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -663,9 +663,9 @@ void test_AddElement() * / 1 2 3 0 0 \ * | 0 4 5 6 0 | * | 0 0 7 8 9 | - * | 10 0 0 0 0 | - * | 0 11 0 0 0 | - * \ 0 0 0 12 0 / + * | 10 1 1 0 0 | + * | 0 11 1 1 0 | + * \ 0 0 1 12 1 / */ const IndexType rows = 6; @@ -675,9 +675,9 @@ void test_AddElement() { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, { 1, 1, 4 }, { 1, 2, 5 }, { 1, 3, 6 }, { 2, 2, 7 }, { 2, 3, 8 }, { 2, 4, 9 }, - { 3, 0, 10 }, { 3, 1, 0 }, { 3, 2, 0 }, - { 4, 1, 11 }, { 4, 2, 0 }, { 4, 3, 0 }, - { 5, 2, 0 }, { 5, 3, 12 }, { 5, 4, 0 } } ); + { 3, 0, 10 }, { 3, 1, 1 }, { 3, 2, 1 }, + { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3, 1 }, + { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } ); /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); m.setCompressedRowLengths( rowLengths ); @@ -718,22 +718,22 @@ void test_AddElement() EXPECT_EQ( m.getElement( 2, 4 ), 9 ); EXPECT_EQ( m.getElement( 3, 0 ), 10 ); - EXPECT_EQ( m.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 11 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 1 ); EXPECT_EQ( m.getElement( 5, 3 ), 12 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 1 ); // Add new elements to the old elements with a multiplying factor applied to the old elements. /* @@ -742,9 +742,9 @@ void test_AddElement() * / 3 6 9 0 0 \ * | 0 12 15 18 0 | * | 0 0 21 24 27 | - * | 30 11 12 0 0 | - * | 0 35 14 15 0 | - * \ 0 0 16 41 18 / + * | 30 13 14 0 0 | + * | 0 35 16 17 0 | + * \ 0 0 18 41 20 / */ RealType newValue = 1; @@ -786,22 +786,22 @@ void test_AddElement() EXPECT_EQ( m.getElement( 2, 4 ), 27 ); EXPECT_EQ( m.getElement( 3, 0 ), 30 ); - EXPECT_EQ( m.getElement( 3, 1 ), 11 ); - EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 1 ), 13 ); + EXPECT_EQ( m.getElement( 3, 2 ), 14 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 35 ); - EXPECT_EQ( m.getElement( 4, 2 ), 14 ); - EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 16 ); + EXPECT_EQ( m.getElement( 5, 2 ), 18 ); EXPECT_EQ( m.getElement( 5, 3 ), 41 ); - EXPECT_EQ( m.getElement( 5, 4 ), 18 ); + EXPECT_EQ( m.getElement( 5, 4 ), 20 ); } template< typename Matrix > -- GitLab From cda43f31c15455833c81aa35289472f93ea50655 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 26 Feb 2020 17:08:02 +0100 Subject: [PATCH 151/179] Added atomic operations. --- src/TNL/Algorithms/AtomicOperations.h | 70 +++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/TNL/Algorithms/AtomicOperations.h diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h new file mode 100644 index 000000000..b00260846 --- /dev/null +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -0,0 +1,70 @@ +/*************************************************************************** + AtomicOperations.h - description + ------------------- + begin : Feb 26, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Tomas Oberhuber, Jakub Klinkovsky + +#pragma once + +#include +#include +#include + +namespace TNL { +namespace Algorithms { + +template< typename Device > +struct AtomicOperations{}; + +template<> +struct AtomicOperations< Devices::Host > +{ + template< typename Value > + static void add( Value& v, const Value& a ) + { +#pragma omp atomic update + v += a; + } +}; + +template<> +struct AtomicOperations< Devices::Cuda > +{ + template< typename Value > + __cuda_callable__ + static void add( Value& v, const Value& a ) + { +#ifdef HAVE_CUDA +#if __CUDA_ARCH__ < 600 + if( std::is_same< Value, double >::value ) + { + unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; + unsigned long long int old = *v_as_ull, assumed; + + do + { + assumed = old; + old = atomicCAS( v_as_ull, + assumed, + __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } + while( assumed != old ); + return; + } +#endif + atomicAdd( &v, a ); +#endif + } + +}; + +} //namespace Algorithms +} //namespace TNL \ No newline at end of file -- GitLab From a72c076d3455730dcbe6c1950e739458d5a1d57a Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 26 Feb 2020 17:08:30 +0100 Subject: [PATCH 152/179] Fixed symmetric sparse matrix with unit tests. --- src/TNL/Matrices/SparseMatrixView.hpp | 36 +++++++++---- .../Matrices/SymmetricSparseMatrixTest.hpp | 53 +++++++++---------- 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 4e5244806..e07e00fa6 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace TNL { namespace Matrices { @@ -382,16 +382,24 @@ vectorProduct( const InVector& inVector, const IndexType paddingIndex = this->getPaddingIndex(); if( isSymmetric() ) outVector *= outVectorMultiplicator; - auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> RealType { const IndexType column = columnIndexesView[ globalIdx ]; compute = ( column != paddingIndex ); if( ! compute ) return 0.0; - if( isSymmetric() ) + if( isSymmetric() && column < row ) { - TNL_ASSERT_TRUE( false, "" ); - //Atomic< RealType, DeviceType > atomic; - //if( isBinary() ) + if( isBinary() ) + Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); + else + { + //std::cerr << outVectorView << std::endl; + Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); + //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ]; + + //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] << + // " --> " << outVectorView[ column ] << std::endl; + } } if( isBinary() ) return inVectorView[ column ]; @@ -401,10 +409,20 @@ vectorProduct( const InVector& inVector, sum += value; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - if( outVectorMultiplicator == 0.0 ) - outVectorView[ row ] = matrixMultiplicator * value; + if( isSymmetric() ) + { + //std::cerr << outVectorView << std::endl; + //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ]; + outVectorView[ row ] += matrixMultiplicator * value; + //std::cerr << " ---> " << outVectorView[ row ] << std::endl; + } else - outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; + { + if( outVectorMultiplicator == 0.0 ) + outVectorView[ row ] = matrixMultiplicator * value; + else + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; + } }; this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index 193c1e031..58a4f4fce 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -734,10 +735,10 @@ void test_VectorProduct() const IndexType m_cols_2 = 4; Matrix m_2( m_rows_2, m_cols_2, { - { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, - { 1, 0, 2 }, { 1, 2, 6 }, { 1, 3, 8 }, + { 0, 0, 1 }, + { 1, 0, 2 }, { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 }, - { 3, 2, 8 }, { 3, 3, 9 } } ); + { 3, 1, 8 }, { 3, 3, 9 } } ); VectorType inVector_2( m_cols_2, 2 ); VectorType outVector_2( m_rows_2, 0 ); @@ -835,9 +836,9 @@ void test_VectorProduct() Matrix m_5( m_rows_5, m_cols_5,{ { 0, 0, 1 }, { 1, 1, 2, }, - { 2, 2, 3 }, { 2, 3, 4 }, { 2, 4, 6 }, { 2, 5, 9 }, - { 3, 2, 4 }, { 3, 3, 5 }, { 3, 4, 7 }, { 3, 5, 10 }, - { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 4, 5, 11 }, + { 2, 2, 3 }, + { 3, 2, 4 }, { 3, 3, 5 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, { 6, 6, 13 }, { 7, 7, 14 } @@ -883,9 +884,9 @@ void test_RowsReduction() Matrix m_5( m_rows_5, m_cols_5,{ { 0, 0, 1 }, { 1, 1, 2, }, - { 2, 2, 3 }, { 2, 3, 4 }, { 2, 4, 6 }, { 2, 5, 9 }, - { 3, 2, 4 }, { 3, 3, 5 }, { 3, 4, 7 }, { 3, 5, 10 }, - { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 4, 5, 11 }, + { 2, 2, 3 }, + { 3, 2, 4 }, { 3, 3, 5 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, { 6, 6, 13 }, { 7, 7, 14 } @@ -896,24 +897,28 @@ void test_RowsReduction() typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 ); typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } ); auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + rowLengths_view = 0; + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType { + if( value != 0.0 && row != column) + TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 ); return ( value != 0.0 ); }; auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { aux += a; }; auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; + rowLengths_view[ rowIdx ] += value; }; m_5.allRowsReduction( fetch, reduce, keep, 0 ); EXPECT_EQ( rowLengths_true, rowLengths ); m_5.getCompressedRowLengths( rowLengths ); - EXPECT_EQ( rowLengths_true, rowLengths ); + typename Matrix::RowsCapacitiesType rowLengths_symmetric( { 1, 1, 1, 2, 3, 4, 1, 1 } ); + EXPECT_EQ( rowLengths_symmetric, rowLengths ); //// // Compute max norm - TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() ); + /*TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() ); auto rowSums_view = rowSums.getView(); auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { return abs( value ); @@ -926,7 +931,7 @@ void test_RowsReduction() }; m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); const RealType maxNorm = TNL::max( rowSums ); - EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36*/ } template< typename Matrix > @@ -1018,7 +1023,7 @@ void test_SaveAndLoad( const char* filename ) { 2, 1, 4 }, { 2, 2, 5 }, { 3, 2, 6 }, { 3, 3, 7 }, { 4, 3, 8 }, { 4, 4, 9 }, - { 5, 5, 10 } } ); + { 5, 4, 10 } } ); // Check the set elements EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); @@ -1060,9 +1065,6 @@ void test_SaveAndLoad( const char* filename ) ASSERT_NO_THROW( savedMatrix.save( filename ) ); Matrix loadedMatrix; - //typename Matrix::CompressedRowLengthsVector rowLengths2( m_rows ); - //rowLengths2 = 3; - //loadedMatrix.setCompressedRowLengths( rowLengths2 ); ASSERT_NO_THROW( loadedMatrix.load( filename ) ); @@ -1071,42 +1073,36 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 5 ), loadedMatrix.getElement( 0, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 5 ), loadedMatrix.getElement( 1, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 5 ), loadedMatrix.getElement( 2, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 5 ), loadedMatrix.getElement( 3, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 4, 5 ), loadedMatrix.getElement( 4, 5 ) ); EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) ); EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) ); - EXPECT_EQ( savedMatrix.getElement( 5, 5 ), loadedMatrix.getElement( 5, 5 ) ); EXPECT_EQ( std::remove( filename ), 0 ); } @@ -1130,11 +1126,10 @@ void test_Print() const IndexType m_cols = 4; Matrix m( m_rows, m_cols, { - { 0, 0, 4 }, { 0, 1, 1 }, - { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 }, - { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 }, - { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 }, - { 4, 3, 1 }, { 4, 4, 4 } + { 0, 0, 4 }, + { 1, 0, 1 }, { 1, 1, 4 }, + { 2, 1, 1 }, { 2, 2, 4 }, + { 3, 2, 1 }, { 3, 3, 4 } } ); std::stringstream printed; -- GitLab From a4885bf91357a0559d4661b079241b1499572667 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Wed, 26 Feb 2020 22:19:10 +0100 Subject: [PATCH 153/179] Fixed symmetric sparse matrix to run with CUDA. --- src/TNL/Algorithms/AtomicOperations.h | 59 +++++++++++++------ src/TNL/Matrices/SparseMatrix.h | 5 ++ src/TNL/Matrices/SparseMatrixView.hpp | 12 ---- .../Matrices/SymmetricSparseMatrixTest_CSR.h | 20 +++---- 4 files changed, 56 insertions(+), 40 deletions(-) diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h index b00260846..4be725d48 100644 --- a/src/TNL/Algorithms/AtomicOperations.h +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include @@ -41,30 +42,52 @@ struct AtomicOperations< Devices::Cuda > static void add( Value& v, const Value& a ) { #ifdef HAVE_CUDA + atomicAdd( &v, a ); +#endif // HAVE_CUDA + } + +#ifdef HAVE_CUDA + __device__ + static void add( double& v, const double& a ) + { #if __CUDA_ARCH__ < 600 - if( std::is_same< Value, double >::value ) - { - unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; - unsigned long long int old = *v_as_ull, assumed; + unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; + unsigned long long int old = *v_as_ull, assumed; - do - { - assumed = old; - old = atomicCAS( v_as_ull, - assumed, - __double_as_longlong( s + __longlong_as_double( assumed ) ) ) ; + do + { + assumed = old; + old = atomicCAS( v_as_ull, + assumed, + __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ; - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } - while( assumed != old ); - return; + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } -#endif + while( assumed != old ); +#else // __CUDA_ARCH__ < 600 atomicAdd( &v, a ); -#endif +#endif //__CUDA_ARCH__ < 600 + +#else // HAVE_CUDA + static void add( double& v, const double& a ){} +#endif // HAVE_CUDA } + __cuda_callable__ + static void add( long int& v, const long int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); +#endif // HAVE_CUDA + } + + __cuda_callable__ + static void add( short int& v, const short int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); +#endif // HAVE_CUDA + } }; - } //namespace Algorithms -} //namespace TNL \ No newline at end of file +} //namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index c7f953a8c..d48e7d6ea 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,6 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); }; + static_assert( ! isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) ); + using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index e07e00fa6..98285e064 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -392,14 +392,7 @@ vectorProduct( const InVector& inVector, if( isBinary() ) Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); else - { - //std::cerr << outVectorView << std::endl; Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); - //outVectorView[ column ] += matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ]; - - //std::cerr << "Symmetric add to out vector row " << column << " value " << valuesView[ globalIdx ] << " * " << inVectorView[ row ] << - // " --> " << outVectorView[ column ] << std::endl; - } } if( isBinary() ) return inVectorView[ column ]; @@ -410,12 +403,7 @@ vectorProduct( const InVector& inVector, }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { if( isSymmetric() ) - { - //std::cerr << outVectorView << std::endl; - //std::cerr << "Adding " << matrixMultiplicator * value << " to result vector " << outVectorView[ row ]; outVectorView[ row ] += matrixMultiplicator * value; - //std::cerr << " ---> " << outVectorView[ row ] << std::endl; - } else { if( outVectorMultiplicator == 0.0 ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h index 45dd5e5b9..f6f7ec95a 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -36,19 +36,19 @@ using MatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > -#ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, +#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. + ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > + //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > #endif // HAVE_CUDA >; -- GitLab From 57aa3118aa5c915213d4665c93a37e83e33415f2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 10:02:32 +0100 Subject: [PATCH 154/179] Added --with-ci-flags option to build scripts. --- .gitlab-ci.yml | 3 ++- CMakeLists.txt | 5 +++++ build | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 624a19729..fa4c3725d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -46,7 +46,7 @@ stages: - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 # enforce (more or less) warning-free builds - - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" + #- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. @@ -64,6 +64,7 @@ stages: -DWITH_EXAMPLES=${WITH_EXAMPLES} -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} + -DWITH_CI_FLAGS=yes # "install" implies the "all" target - ninja ${NINJAFLAGS} install - if [[ ${WITH_TESTS} == "yes" ]]; then diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d1666163..a312b00cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,11 @@ set( CMAKE_SHARED_LINKER_FLAGS "" ) set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-rdynamic" ) set( CMAKE_SHARED_LINKER_FLAGS_RELEASE "" ) +if( ${WITH_CI_FLAGS} ) + # enforce (more or less) warning-free builds + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" ) +endif() + # set additional Debug/Release options using generator expressions # (that way we can exclude some options for specific targets, see https://stackoverflow.com/a/59734798 for details) add_compile_options( diff --git a/build b/build index 9deb12d10..5a26cbb61 100755 --- a/build +++ b/build @@ -31,6 +31,7 @@ WITH_EXAMPLES="yes" WITH_PYTHON="yes" WITH_TOOLS="yes" WITH_BENCHMARKS="yes" +WITH_CI_FLAGS="no" for option in "$@" do @@ -63,6 +64,7 @@ do --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; --with-python=* ) WITH_PYTHON="${option#*=}" ;; --with-cxx-flags=* ) WITH_CXX_FLAGS="${option#*=}" ;; + --with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; @@ -93,6 +95,7 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." echo " --with-cxx-flags=FLAGS Additional flags for C++ compiler." + echo " --with-cxx-flags=yes/no Turns on more strict C++ flags for CI. 'no' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." @@ -145,6 +148,7 @@ cmake_command=( -DWITH_PYTHON=${WITH_PYTHON} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS} + -DWITH_CI_FLAGS=${WITH_CI_FLAGS} -DDCMTK_DIR=${DCMTK_DIR} ) -- GitLab From 135698134ecafff7846e342bf75202c434e54595 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 10:56:12 +0100 Subject: [PATCH 155/179] Deleted unused variable in SparseMatrixView. --- src/TNL/Matrices/SparseMatrixView.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 98285e064..e0f1e5e0a 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -209,7 +209,6 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: getRow( const IndexType& rowIdx ) -> RowView { TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); - typename RowView::SegmentViewType t = this->segments.getSegmentView( rowIdx ); return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); } -- GitLab From ab0c1c08db015d2334ec794db3a41c84d3fc9cae Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 10:59:25 +0100 Subject: [PATCH 156/179] Added TNL::is_same which works in CUDA device code. --- src/TNL/Assert.h | 21 ++++++++++++++++++++- src/TNL/Containers/Array.hpp | 8 ++++---- src/TNL/Containers/ArrayView.hpp | 8 ++++---- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h index dc4ba7cf4..1d3aa88fe 100644 --- a/src/TNL/Assert.h +++ b/src/TNL/Assert.h @@ -124,6 +124,25 @@ #include namespace TNL { + + // This is alternative implementation of is_same because std::is_same + // does not work in CUDA device code ("std::integral_constant ::value"). + // This can be removed when std::_is_same works well. + // + template< typename T1, typename T2 > + struct is_same + { + __cuda_callable__ + static constexpr bool value() { return false; } + }; + + template< typename T1 > + struct is_same< T1, T1 > + { + __cuda_callable__ + static constexpr bool value() { return true; } + }; + /** * \brief Internal namespace for helper classes used in the TNL_ASSERT_* macros. */ @@ -394,7 +413,7 @@ TNL_IMPL_CMP_HELPER_( GT, > ); pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \ msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \ #val1, #val2, val1, val2 ) - + // Main definitions of the TNL_ASSERT_* macros // unary #define TNL_ASSERT_TRUE( val, msg ) \ diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 4dd8d5a2f..40b7d1b45 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -522,9 +522,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); @@ -542,9 +542,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 81e143ac2..838ebc32b 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -253,9 +253,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); @@ -272,9 +272,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), + TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); -- GitLab From 26ed46e975764f38b86800e1329638dc53bc577e Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 11:00:13 +0100 Subject: [PATCH 157/179] Deleting CI C++ flags which were moved to the root CMakeLists. --- .gitlab-ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fa4c3725d..de46d4c82 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,8 +45,6 @@ stages: fi - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 - # enforce (more or less) warning-free builds - #- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. -- GitLab From 02354656efe4f6162a65375ec0e7181f2978e818 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 11:46:29 +0100 Subject: [PATCH 158/179] Fixed include of cuda.h in AtomicOperations. --- src/TNL/Algorithms/AtomicOperations.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h index 4be725d48..679b14eb5 100644 --- a/src/TNL/Algorithms/AtomicOperations.h +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -12,7 +12,9 @@ #pragma once +#ifdef HAVE_CUDA #include +#endif #include #include #include @@ -80,7 +82,7 @@ struct AtomicOperations< Devices::Cuda > TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); #endif // HAVE_CUDA } - + __cuda_callable__ static void add( short int& v, const short int& a ) { -- GitLab From fe5aca83de4ff47a768896a6dac35cea0ff48eb7 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Thu, 27 Feb 2020 12:23:18 +0100 Subject: [PATCH 159/179] Added MatrixInfo. --- src/TNL/Matrices/MatrixInfo.h | 76 +++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 src/TNL/Matrices/MatrixInfo.h diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h new file mode 100644 index 000000000..34f85dd82 --- /dev/null +++ b/src/TNL/Matrices/MatrixInfo.h @@ -0,0 +1,76 @@ +/*************************************************************************** + Matrix.h - description + ------------------- + begin : Dec 18, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace TNL { +/** + * \brief Namespace for matrix formats. + */ +namespace Matrices { + +template< typename Matrix > +struct MatrixInfo +{}; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +struct MatrixInfo< DenseView< Real, Device, RowMajorOrder > > +{ + static String getDensity() { return String( "dense" ); }; +}; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +struct MatrixInfo< Dense< Real, Device, RowMajorOrder, RealAllocator > > +: public MatrixInfo< typename Dense< Real, Device, RowMajorOrder, RealAllocator >::ViewType > +{ +}; + + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename Device_, typename Index_ > class SegmentsView > +struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { + if( std::is_same< SegementsView ........ >) + }; +}; + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments, + typename RealAllocator, + typename IndexAllocator > +struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > > +:public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType > +{ +} + +} //namespace Matrices +} //namespace TNL \ No newline at end of file -- GitLab From 8f57fd182a486412b5acafd06a235245ac30c045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 27 Feb 2020 14:02:56 +0100 Subject: [PATCH 160/179] Fixed syntax error in AtomicOperations.h --- src/TNL/Algorithms/AtomicOperations.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h index 679b14eb5..6b5c5b4e0 100644 --- a/src/TNL/Algorithms/AtomicOperations.h +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -69,11 +69,10 @@ struct AtomicOperations< Devices::Cuda > #else // __CUDA_ARCH__ < 600 atomicAdd( &v, a ); #endif //__CUDA_ARCH__ < 600 - + } #else // HAVE_CUDA static void add( double& v, const double& a ){} #endif // HAVE_CUDA - } __cuda_callable__ static void add( long int& v, const long int& a ) -- GitLab From b06136b9dfd99b592cfd88cdff659b966c54ee7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 27 Feb 2020 16:32:37 +0100 Subject: [PATCH 161/179] Added use of MatrixInfo to SpMV benchmark. --- src/Benchmarks/SpMV/spmv.h | 6 +- src/TNL/Containers/Segments/CSR.h | 2 + src/TNL/Containers/Segments/CSR.hpp | 10 +++ src/TNL/Containers/Segments/CSRView.h | 2 + src/TNL/Containers/Segments/CSRView.hpp | 9 +++ src/TNL/Containers/Segments/Ellpack.h | 3 +- src/TNL/Containers/Segments/Ellpack.hpp | 12 ++++ src/TNL/Containers/Segments/EllpackView.h | 2 + src/TNL/Containers/Segments/EllpackView.hpp | 11 +++ src/TNL/Containers/Segments/SlicedEllpack.h | 2 + src/TNL/Containers/Segments/SlicedEllpack.hpp | 12 ++++ .../Containers/Segments/SlicedEllpackView.h | 2 + .../Containers/Segments/SlicedEllpackView.hpp | 11 +++ src/TNL/Matrices/MatrixInfo.h | 69 ++++++++++++++++--- 14 files changed, 139 insertions(+), 14 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index e3a1ae047..02a26854d 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -160,7 +161,7 @@ benchmarkSpMV( Benchmark& benchmark, { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, - { "matrix format", convertToString( getType( hostMatrix ) ) } + { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) } } )); hostVector.setSize( hostMatrix.getColumns() ); @@ -294,7 +295,8 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); - //benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); //// // Segments based sparse matrices diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index 3645e9f6a..89cad0c6a 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -47,6 +47,8 @@ class CSR static String getSerializationType(); + static String getSegmentsType(); + /** * \brief Set sizes of particular segments. */ diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index 55dcba74c..9a948b04e 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -64,6 +64,16 @@ getSerializationType() return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index, + typename IndexAllocator > +String +CSR< Device, Index, IndexAllocator >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + template< typename Device, typename Index, typename IndexAllocator > diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index 759fe8ff7..f7cf815d0 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -52,6 +52,8 @@ class CSRView static String getSerializationType(); + static String getSegmentsType(); + __cuda_callable__ ViewType getView(); diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 043e06e04..fab5c6da7 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -64,6 +64,15 @@ getSerializationType() return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index > +String +CSRView< Device, Index >:: +getSegmentsType() +{ + return "CSR"; +} + template< typename Device, typename Index > __cuda_callable__ diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 6edacb1cf..a1188a854 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -39,7 +39,6 @@ class Ellpack //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; - Ellpack(); Ellpack( const SegmentsSizes& sizes ); @@ -52,6 +51,8 @@ class Ellpack static String getSerializationType(); + static String getSegmentsType(); + ViewType getView(); //ConstViewType getConstView() const; diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 663a65bc8..9c59c5529 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -88,6 +88,18 @@ getSerializationType() return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +String +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index dcbc56d1b..10a89bd7b 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -54,6 +54,8 @@ class EllpackView static String getSerializationType(); + static String getSegmentsType(); + __cuda_callable__ ViewType getView(); diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index ea2dc0d21..84086f380 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -74,6 +74,17 @@ getSerializationType() return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +String +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentsType() +{ + return "Ellpack"; +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index e1cdfa1d4..2027f1d78 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -49,6 +49,8 @@ class SlicedEllpack static String getSerializationType(); + static String getSegmentsType(); + ViewType getView(); ConstViewType getConstView() const; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index 3d3a6d8c3..9ba1276e3 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -81,6 +81,18 @@ getSerializationType() return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + template< typename Device, typename Index, typename IndexAllocator, diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 23001553c..6e2e55bbc 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -56,6 +56,8 @@ class SlicedEllpackView static String getSerializationType(); + static String getSegmentsType(); + __cuda_callable__ ViewType getView(); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 3e3c8c09c..f9e252fd9 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -85,6 +85,17 @@ getSerializationType() return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentsType() +{ + return "SlicedEllpack"; +} + template< typename Device, typename Index, bool RowMajorOrder, diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h index 34f85dd82..75cac4055 100644 --- a/src/TNL/Matrices/MatrixInfo.h +++ b/src/TNL/Matrices/MatrixInfo.h @@ -12,9 +12,17 @@ #include #include -#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include namespace TNL { /** @@ -30,7 +38,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -struct MatrixInfo< DenseView< Real, Device, RowMajorOrder > > +struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > > { static String getDensity() { return String( "dense" ); }; }; @@ -40,12 +48,11 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -struct MatrixInfo< Dense< Real, Device, RowMajorOrder, RealAllocator > > -: public MatrixInfo< typename Dense< Real, Device, RowMajorOrder, RealAllocator >::ViewType > +struct MatrixInfo< Dense< Real, Device, Index, RowMajorOrder, RealAllocator > > +: public MatrixInfo< typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::ViewType > { }; - template< typename Real, typename Device, typename Index, @@ -55,9 +62,7 @@ struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsVi { static String getDensity() { return String( "sparse" ); }; - static String getFormat() { - if( std::is_same< SegementsView ........ >) - }; + static String getFormat() { return SegmentsView< Device, Index >::getSegmentsType(); }; }; template< typename Real, @@ -68,9 +73,51 @@ template< typename Real, typename RealAllocator, typename IndexAllocator > struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > > -:public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType > +: public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType > +{ +}; + +///// +// Legacy matrices +template< typename Real, typename Device, typename Index > +struct MatrixInfo< BiEllpack< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "BiEllpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< CSR< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "CSR Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< ChunkedEllpack< Real, Device, Index > > { -} + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "ChunkedEllpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< Ellpack< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "Ellpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index, int SliceSize > +struct MatrixInfo< SlicedEllpack< Real, Device, Index, SliceSize> > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "SlicedEllpack Legacy"; }; +}; } //namespace Matrices -} //namespace TNL \ No newline at end of file +} //namespace TNL -- GitLab From 3cde4e81047d78453c5fa6463d43d7241e9eba42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Thu, 27 Feb 2020 20:46:57 +0100 Subject: [PATCH 162/179] Fixed MatrixReader after rebase. --- src/TNL/Matrices/MatrixReader_impl.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index a80d00283..df2c05c63 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -60,12 +60,6 @@ void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, matrix.setDimensions( rows, columns ); rowLengths.setSize( rows ); - if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) ) - { - std::cerr << "Unable to compute compressed row lengths." << std::endl; - return false; - } - computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ); matrix.setCompressedRowLengths( rowLengths ); -- GitLab From b69b69ab624a5bd0014d03b44f513541c60ec0d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 07:50:07 +0100 Subject: [PATCH 163/179] Removed --with-cxx-flags from the build script If necessary, custom flags can be specified by simply exporting the CXXFLAGS environment variable in the shell. --- CMakeLists.txt | 2 +- build | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a312b00cb..ea0d8a30b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options -set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WITH_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) +set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode diff --git a/build b/build index 5a26cbb61..ee74fa87b 100755 --- a/build +++ b/build @@ -63,7 +63,6 @@ do --with-tools=* ) WITH_TOOLS="${option#*=}" ;; --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; --with-python=* ) WITH_PYTHON="${option#*=}" ;; - --with-cxx-flags=* ) WITH_CXX_FLAGS="${option#*=}" ;; --with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." @@ -94,8 +93,6 @@ if [[ ${HELP} == "yes" ]]; then echo " --with-tools=yes/no Compile the 'src/Tools' directory. 'yes' by default." echo " --with-python=yes/no Compile the Python bindings. 'yes' by default." echo " --with-benchmarks=yes/no Compile the 'src/Benchmarks' directory. 'yes' by default." - echo " --with-cxx-flags=FLAGS Additional flags for C++ compiler." - echo " --with-cxx-flags=yes/no Turns on more strict C++ flags for CI. 'no' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." echo " --verbose It enables verbose build." echo " --root-dir=PATH Path to the TNL source code root dir." @@ -147,7 +144,6 @@ cmake_command=( -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} - -DWITH_CXX_FLAGS=${WITH_CXX_FLAGS} -DWITH_CI_FLAGS=${WITH_CI_FLAGS} -DDCMTK_DIR=${DCMTK_DIR} ) -- GitLab From 603b5edf83f3bb36b35d23d77e29b84b540243a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 08:05:26 +0100 Subject: [PATCH 164/179] Removed useless include of SlicedEllpack from ODESolvers benchmark --- src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h index 1e4bc380e..dad2cdd8d 100644 --- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h @@ -36,8 +36,6 @@ #include "Euler.h" #include "Merson.h" -#include - using namespace TNL; using namespace TNL::Benchmarks; using namespace TNL::Pointers; -- GitLab From 2d93d5680f8ca256862a038277c74763afb81d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 08:46:15 +0100 Subject: [PATCH 165/179] Removed TNL::is_same and tweaked asserts in Array and ArrayView --- src/TNL/Assert.h | 19 ------------------- src/TNL/Containers/Array.h | 4 ++-- src/TNL/Containers/Array.hpp | 12 +++++------- src/TNL/Containers/ArrayView.h | 4 ++-- src/TNL/Containers/ArrayView.hpp | 10 ++++------ 5 files changed, 13 insertions(+), 36 deletions(-) diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h index 1d3aa88fe..630abd09f 100644 --- a/src/TNL/Assert.h +++ b/src/TNL/Assert.h @@ -124,25 +124,6 @@ #include namespace TNL { - - // This is alternative implementation of is_same because std::is_same - // does not work in CUDA device code ("std::integral_constant ::value"). - // This can be removed when std::_is_same works well. - // - template< typename T1, typename T2 > - struct is_same - { - __cuda_callable__ - static constexpr bool value() { return false; } - }; - - template< typename T1 > - struct is_same< T1, T1 > - { - __cuda_callable__ - static constexpr bool value() { return true; } - }; - /** * \brief Internal namespace for helper classes used in the TNL_ASSERT_* macros. */ diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index bf69f4888..116624511 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -457,7 +457,7 @@ class Array * host, and if the array was allocated in the device memory, it can be * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory - * accesses which lead to segmentation fault. If you need to do just a + * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. @@ -474,7 +474,7 @@ class Array * host, and if the array was allocated in the device memory, it can be * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory - * accesses which lead to segmentation fault. If you need to do just a + * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 40b7d1b45..ab81db7aa 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -71,7 +71,7 @@ Array( const IndexType& size, const Value& value, const AllocatorType& allocator : allocator( allocator ) { this->setSize( size ); - ( *this ) = value; + *this = value; } template< typename Value, @@ -522,10 +522,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), - "Attempt to access data not allocated on the host from the host." ); + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); @@ -542,10 +541,9 @@ Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), - "Attempt to access data not allocated on the host from the host." ); + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index af54aef8a..c29e00741 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -349,7 +349,7 @@ public: * host, and if the data was allocated in the device memory, it can be * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory - * accesses which lead to segmentation fault. If you need to do just a + * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. @@ -367,7 +367,7 @@ public: * host, and if the data was allocated in the device memory, it can be * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory - * accesses which lead to segmentation fault. If you need to do just a + * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 838ebc32b..0562b81db 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -253,10 +253,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), - "Attempt to access data not allocated on the host from the host." ); + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); @@ -272,10 +271,9 @@ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { #ifdef __CUDA_ARCH__ - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Cuda >::value() ), "Attempt to access data not allocated on CUDA device from CUDA device." ); + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else - TNL_ASSERT_TRUE( ( TNL::is_same< Device, Devices::Host >::value() || TNL::is_same< Device, Devices::Sequential >::value() ), - "Attempt to access data not allocated on the host from the host." ); + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); -- GitLab From 79c1e84193c78387d100632a023e0e381167ebff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 08:50:45 +0100 Subject: [PATCH 166/179] Removed method ArrayView::copy Shallow copy is equivalent to binding, for which there is ArrayView::bind. --- src/TNL/Containers/ArrayView.h | 9 --------- src/TNL/Containers/ArrayView.hpp | 13 ------------- src/TNL/Containers/Segments/CSRView.hpp | 2 +- src/TNL/Containers/Segments/SlicedEllpackView.hpp | 4 ++-- src/TNL/Matrices/MatrixView.hpp | 2 +- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 4 ++-- src/TNL/Matrices/SparseMatrixView.hpp | 2 +- 7 files changed, 7 insertions(+), 29 deletions(-) diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index c29e00741..5b9766ffd 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -237,15 +237,6 @@ public: typename = std::enable_if_t< std::is_convertible< T, ValueType >::value || IsArrayType< T >::value > > ArrayView& operator=( const T& array ); - /** - * \brief Makes shallow copy of the array view. - * - * \param view Reference to the source array view. - * \return Reference to this array view. - */ - __cuda_callable__ - ArrayView& copy( const ArrayView& view ); - /** * \brief Swaps this array view with another. * diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index 0562b81db..e36182cd5 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -118,19 +118,6 @@ operator=( const T& data ) return *this; } -template< typename Value, - typename Device, - typename Index > -__cuda_callable__ -ArrayView< Value, Device, Index >& -ArrayView< Value, Device, Index >:: -copy( const ArrayView& view ) -{ - data = view.data; - size = view.size; - return *this; -} - template< typename Value, typename Device, typename Index > diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index fab5c6da7..02be7f099 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -237,7 +237,7 @@ CSRView< Device, Index >& CSRView< Device, Index >:: operator=( const CSRView& view ) { - this->offsets.copy( view.offsets ); + this->offsets.bind( view.offsets ); return *this; } diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index f9e252fd9..c4e03aada 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -368,8 +368,8 @@ operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& v this->size = view.size; this->alignedSize = view.alignedSize; this->segmentsCount = view.segmentsCount; - this->sliceOffsets.copy( view.sliceOffsets ); - this->sliceSegmentSizes.copy( view.sliceSegmentSizes ); + this->sliceOffsets.bind( view.sliceOffsets ); + this->sliceSegmentSizes.bind( view.sliceSegmentSizes ); return *this; } diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 363fec208..9fd73e519 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -138,7 +138,7 @@ operator=( const MatrixView& view ) { rows = view.rows; columns = view.columns; - values.copy( view.values ); + values.bind( view.values ); return *this; } diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 2bd5392df..ecfe1c1d8 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -335,8 +335,8 @@ MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: operator=( const MultidiagonalMatrixView& view ) { MatrixView< Real, Device, Index >::operator=( view ); - this->diagonalsShifts.copy( view.diagonalsShifts ); - this->hostDiagonalsShifts.copy( view.hostDiagonalsShifts ); + this->diagonalsShifts.bind( view.diagonalsShifts ); + this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts ); this->indexer = view.indexer; return *this; } diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index e0f1e5e0a..2bae61f98 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -606,7 +606,7 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix ) { MatrixView< Real, Device, Index >::operator=( matrix ); - this->columnIndexes.copy( matrix.columnIndexes ); + this->columnIndexes.bind( matrix.columnIndexes ); this->segments = matrix.segments; return *this; } -- GitLab From ae8ee53ea2af54ecbb9f79f666da778a70c2dc68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 09:06:36 +0100 Subject: [PATCH 167/179] Fixed expression in static_assert in SparseMatrix.h --- src/TNL/Matrices/SparseMatrix.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index d48e7d6ea..032767518 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -34,10 +34,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; static constexpr bool isBinary() { return MatrixType::isBinary(); }; - static_assert( ! isSymmetric() || - ! std::is_same< Device, Devices::Cuda >::value || - ( ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), - "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ) ); + static_assert( + ! isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ); using RealType = Real; template< typename Device_, typename Index_, typename IndexAllocator_ > -- GitLab From c22a969b8d9a4d7c30d82c13b86bfc4e987ce131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 09:30:23 +0100 Subject: [PATCH 168/179] Removed unnecessary/duplicate types from the instantiations of SegmentsTest --- src/UnitTests/Containers/Segments/SegmentsTest_CSR.h | 10 +--------- .../Containers/Segments/SegmentsTest_Ellpack.h | 10 +--------- .../Containers/Segments/SegmentsTest_SlicedEllpack.h | 10 +--------- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h index 81d4e9ff3..f2a3a1863 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h @@ -27,19 +27,11 @@ protected: // types for which MatrixTest is instantiated using CSRSegmentsTypes = ::testing::Types < - TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, - TNL::Containers::Segments::CSR< TNL::Devices::Host, long >, - TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, - TNL::Containers::Segments::CSR< TNL::Devices::Host, long >, TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, TNL::Containers::Segments::CSR< TNL::Devices::Host, long > #ifdef HAVE_CUDA ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long > #endif >; diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h index 7b5e90b23..7def8a732 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h @@ -27,19 +27,11 @@ protected: // types for which MatrixTest is instantiated using EllpackSegmentsTypes = ::testing::Types < - TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long >, TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long > #ifdef HAVE_CUDA ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long > #endif >; diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h index 1bcff3191..51131c7df 100644 --- a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h @@ -27,19 +27,11 @@ protected: // types for which MatrixTest is instantiated using SlicedEllpackSegmentsTypes = ::testing::Types < - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long >, TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long > #ifdef HAVE_CUDA ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, - TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long > #endif >; -- GitLab From dec4b38830fd9a5c0a2456f4c1c2ff7025a82175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 1 Mar 2020 09:35:31 +0100 Subject: [PATCH 169/179] Removed tests for sparse matrices which use short as IndexType to speed up the compilation --- src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h | 10 +--------- .../Matrices/BinarySparseMatrixTest_Ellpack.h | 10 +--------- .../Matrices/BinarySparseMatrixTest_SlicedEllpack.h | 10 +--------- .../Matrices/Legacy/SparseMatrixTest_AdEllpack.h | 10 +--------- .../Matrices/Legacy/SparseMatrixTest_BiEllpack.h | 10 +--------- src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h | 10 +--------- .../Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h | 10 +--------- .../Matrices/Legacy/SparseMatrixTest_Ellpack.h | 10 +--------- .../Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h | 10 +--------- src/UnitTests/Matrices/SparseMatrixTest_CSR.h | 10 +--------- src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h | 10 +--------- .../Matrices/SparseMatrixTest_SlicedEllpack.h | 10 +--------- 12 files changed, 12 insertions(+), 108 deletions(-) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h index 9cd52741a..a853281be 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -29,10 +29,6 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, @@ -42,11 +38,7 @@ using CSRMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h index 708bd85f0..3c0a65cfd 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -40,10 +40,6 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, @@ -53,11 +49,7 @@ using EllpackMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h index 7ebc25968..98c5f65ae 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -40,10 +40,6 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, @@ -53,11 +49,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h index d2d268dac..8e07205e5 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using AdEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::AdEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::AdEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::AdEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::AdEllpack< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using AdEllpackMatrixTypes = ::testing::Types TNL::Matrices::AdEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::AdEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h index 9dab63c1a..c38648107 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using BiEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::BiEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::BiEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::BiEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::BiEllpack< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using BiEllpackMatrixTypes = ::testing::Types TNL::Matrices::BiEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::BiEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h index 3cae12e3a..13c1ed6e0 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::CSR< int, TNL::Devices::Host, short >, - TNL::Matrices::CSR< long, TNL::Devices::Host, short >, - TNL::Matrices::CSR< float, TNL::Devices::Host, short >, - TNL::Matrices::CSR< double, TNL::Devices::Host, short >, TNL::Matrices::CSR< int, TNL::Devices::Host, int >, TNL::Matrices::CSR< long, TNL::Devices::Host, int >, TNL::Matrices::CSR< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using CSRMatrixTypes = ::testing::Types TNL::Matrices::CSR< float, TNL::Devices::Host, long >, TNL::Matrices::CSR< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::CSR< int, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< long, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< float, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< double, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >, TNL::Matrices::CSR< long, TNL::Devices::Cuda, int >, TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >, TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h index a3c049910..5d304bde3 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h @@ -28,10 +28,6 @@ protected: // types for which MatrixTest is instantiated using ChEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, int >, @@ -41,11 +37,7 @@ using ChEllpackMatrixTypes = ::testing::Types TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h index fa6b2027c..bb9fe4fc7 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::Ellpack< int, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< long, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< float, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< double, TNL::Devices::Host, short >, TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >, TNL::Matrices::Ellpack< long, TNL::Devices::Host, int >, TNL::Matrices::Ellpack< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using EllpackMatrixTypes = ::testing::Types TNL::Matrices::Ellpack< float, TNL::Devices::Host, long >, TNL::Matrices::Ellpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h index 7f5ad546f..8b3958384 100644 --- a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h @@ -32,10 +32,6 @@ using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32 // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - SlicedEllpackType< int, TNL::Devices::Host, short >, - SlicedEllpackType< long, TNL::Devices::Host, short >, - SlicedEllpackType< float, TNL::Devices::Host, short >, - SlicedEllpackType< double, TNL::Devices::Host, short >, SlicedEllpackType< int, TNL::Devices::Host, int >, SlicedEllpackType< long, TNL::Devices::Host, int >, SlicedEllpackType< float, TNL::Devices::Host, int >, @@ -45,11 +41,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types SlicedEllpackType< float, TNL::Devices::Host, long >, SlicedEllpackType< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,SlicedEllpackType< int, TNL::Devices::Cuda, short >, - SlicedEllpackType< long, TNL::Devices::Cuda, short >, - SlicedEllpackType< float, TNL::Devices::Cuda, short >, - SlicedEllpackType< double, TNL::Devices::Cuda, short >, - SlicedEllpackType< int, TNL::Devices::Cuda, int >, + ,SlicedEllpackType< int, TNL::Devices::Cuda, int >, SlicedEllpackType< long, TNL::Devices::Cuda, int >, SlicedEllpackType< float, TNL::Devices::Cuda, int >, SlicedEllpackType< double, TNL::Devices::Cuda, int >, diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index f029c3bc7..a72d548f5 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -29,10 +29,6 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, @@ -42,11 +38,7 @@ using CSRMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 2bf5fe20d..2a890e694 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -40,10 +40,6 @@ using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, In // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, @@ -53,11 +49,7 @@ using EllpackMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 190839fd5..17b48dcf4 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -41,10 +41,6 @@ using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Devic // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, @@ -54,11 +50,7 @@ using SlicedEllpackMatrixTypes = ::testing::Types TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, -- GitLab From 7e9a8ca0410dc0b4c8748f5e3c647c90d411b45d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 10:56:52 +0100 Subject: [PATCH 170/179] Restoration of protected members in Matrix. --- src/TNL/Matrices/Dense.hpp | 2 +- src/TNL/Matrices/Matrix.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 0d7037b1f..346c26ed8 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -918,7 +918,7 @@ operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealA this->setLike( matrix ); if( RowMajorOrder == RHSRowMajorOrder ) { - this->values = matrix.values; + this->values = matrix.getValues(); return *this; } diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index cf61f9efa..3c0fd8a9b 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -126,8 +126,7 @@ public: __cuda_callable__ Index getValuesSize() const; - // TODO: restore this - //protected: + protected: IndexType rows, columns, numberOfColors; -- GitLab From ba0375a7929c1956014954a0c4e8554ce4b42ed2 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 10:57:45 +0100 Subject: [PATCH 171/179] Code formatting in DenseMatrixView.hpp --- src/TNL/Matrices/DenseMatrixView.hpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 00ca5edc2..d61c50794 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -374,8 +374,9 @@ template< typename Real, bool RowMajorOrder > template< typename Vector > __cuda_callable__ -typename Vector::RealType DenseMatrixView< Real, Device, Index, RowMajorOrder >::rowVectorProduct( const IndexType row, - const Vector& vector ) const +typename Vector::RealType +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const { RealType sum( 0.0 ); // TODO: Fix this @@ -390,8 +391,9 @@ template< typename Real, bool RowMajorOrder > template< typename InVector, typename OutVector > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); @@ -416,9 +418,11 @@ template< typename Real, typename Index, bool RowMajorOrder > template< typename Matrix > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::addMatrix( const Matrix& matrix, - const RealType& matrixMultiplicator, - const RealType& thisMatrixMultiplicator ) +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) { TNL_ASSERT( this->getColumns() == matrix.getColumns() && this->getRows() == matrix.getRows(), -- GitLab From af032c5e55a8e7712447cd4794d27212f561561d Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 10:58:34 +0100 Subject: [PATCH 172/179] Moving CUDA kernels from DenseMatrixView.hpp to details/DenseMatrix.h --- src/TNL/Matrices/DenseMatrixView.hpp | 254 ------------------------- src/TNL/Matrices/details/DenseMatrix.h | 253 ++++++++++++++++++++++++ 2 files changed, 253 insertions(+), 254 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index d61c50794..50f30d889 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -437,107 +437,6 @@ addMatrix( const Matrix& matrix, this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; } -#ifdef HAVE_CUDA_______________ -template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename Matrix1, - typename Matrix2, - int tileDim, - int tileRowBlockSize > -__global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix, - const Matrix1* matrixA, - const Matrix2* matrixB, - const Real matrixAMultiplicator, - const Real matrixBMultiplicator, - const Index gridIdx_x, - const Index gridIdx_y ) -{ - /**** - * Here we compute product C = A * B. To profit from the fast - * shared memory we do it by tiles. - */ - - typedef Index IndexType; - typedef Real RealType; - __shared__ Real tileA[ tileDim*tileDim ]; - __shared__ Real tileB[ tileDim*tileDim ]; - __shared__ Real tileC[ tileDim*tileDim ]; - - const IndexType& matrixARows = matrixA->getRows(); - const IndexType& matrixAColumns = matrixA->getColumns(); - const IndexType& matrixBRows = matrixB->getRows(); - const IndexType& matrixBColumns = matrixB->getColumns(); - - /**** - * Reset the tile C - */ - for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) - tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0; - - /**** - * Compute the result tile coordinates - */ - const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim; - const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim; - - /**** - * Sum over the matrix tiles - */ - for( IndexType i = 0; i < matrixAColumns; i += tileDim ) - { - for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) - { - const IndexType matrixARow = resultTileRow + threadIdx.y + row; - const IndexType matrixAColumn = i + threadIdx.x; - if( matrixARow < matrixARows && matrixAColumn < matrixAColumns ) - tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] = - matrixAMultiplicator * matrixA->getElementFast( matrixARow, matrixAColumn ); - - const IndexType matrixBRow = i + threadIdx.y + row; - const IndexType matrixBColumn = resultTileColumn + threadIdx.x; - if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns ) - tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] = - matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn ); - } - __syncthreads(); - - const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow ); - const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i ); - const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i ); - const IndexType tileBLastColumn = - tnlCudaMin( tileDim, matrixBColumns - resultTileColumn ); - - for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize ) - { - RealType sum( 0.0 ); - for( IndexType j = 0; j < tileALastColumn; j++ ) - sum += tileA[ ( threadIdx.y + row )*tileDim + j ]* - tileB[ j*tileDim + threadIdx.x ]; - tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum; - } - __syncthreads(); - } - - /**** - * Write the result tile to the result matrix - */ - const IndexType& matrixCRows = resultMatrix->getRows(); - const IndexType& matrixCColumns = resultMatrix->getColumns(); - for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) - { - const IndexType matrixCRow = resultTileRow + row + threadIdx.y; - const IndexType matrixCColumn = resultTileColumn + threadIdx.x; - if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns ) - resultMatrix->setElementFast( matrixCRow, - matrixCColumn, - tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] ); - } - -} -#endif - template< typename Real, typename Device, typename Index, @@ -627,159 +526,6 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co } } -#ifdef HAVE_CUDA________________________ -template< typename Real, - typename Index, - typename Matrix, - bool RowMajorOrder, - typename RealAllocator, - int tileDim, - int tileRowBlockSize > -__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, - const Matrix* inputMatrix, - const Real matrixMultiplicator, - const Index gridIdx_x, - const Index gridIdx_y ) -{ - __shared__ Real tile[ tileDim*tileDim ]; - - const Index columns = inputMatrix->getColumns(); - const Index rows = inputMatrix->getRows(); - - - /**** - * Diagonal mapping of the CUDA blocks - */ - Index blockIdx_x, blockIdx_y; - if( columns == rows ) - { - blockIdx_y = blockIdx.x; - blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; - } - else - { - Index bID = blockIdx.x + gridDim.x*blockIdx.y; - blockIdx_y = bID % gridDim.y; - blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; - } - - /**** - * Read the tile to the shared memory - */ - const Index readRowPosition = - ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; - const Index readColumnPosition = - ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; - for( Index rowBlock = 0; - rowBlock < tileDim; - rowBlock += tileRowBlockSize ) - { - tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = - inputMatrix->getElementFast( readColumnPosition, - readRowPosition + rowBlock ); - } - __syncthreads(); - - /**** - * Write the tile to the global memory - */ - const Index writeRowPosition = - ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; - const Index writeColumnPosition = - ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; - for( Index rowBlock = 0; - rowBlock < tileDim; - rowBlock += tileRowBlockSize ) - { - resultMatrix->setElementFast( writeColumnPosition, - writeRowPosition + rowBlock, - matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); - - } - -} - -template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename Matrix, - int tileDim, - int tileRowBlockSize > -__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, - const Matrix* inputMatrix, - const Real matrixMultiplicator, - const Index gridIdx_x, - const Index gridIdx_y ) -{ - __shared__ Real tile[ tileDim*tileDim ]; - - const Index columns = inputMatrix->getColumns(); - const Index rows = inputMatrix->getRows(); - - /**** - * Diagonal mapping of the CUDA blocks - */ - Index blockIdx_x, blockIdx_y; - if( columns == rows ) - { - blockIdx_y = blockIdx.x; - blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; - } - else - { - Index bID = blockIdx.x + gridDim.x*blockIdx.y; - blockIdx_y = bID % gridDim.y; - blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; - } - - /**** - * Read the tile to the shared memory - */ - const Index readRowPosition = - ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; - const Index readColumnPosition = - ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; - if( readColumnPosition < columns ) - { - const Index readOffset = readRowPosition * columns + readColumnPosition; - for( Index rowBlock = 0; - rowBlock < tileDim; - rowBlock += tileRowBlockSize ) - { - if( readRowPosition + rowBlock < rows ) - tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = - inputMatrix->getElementFast( readColumnPosition, - readRowPosition + rowBlock ); - } - } - __syncthreads(); - - /**** - * Write the tile to the global memory - */ - const Index writeRowPosition = - ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; - const Index writeColumnPosition = - ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; - if( writeColumnPosition < rows ) - { - const Index writeOffset = writeRowPosition * rows + writeColumnPosition; - for( Index rowBlock = 0; - rowBlock < tileDim; - rowBlock += tileRowBlockSize ) - { - if( writeRowPosition + rowBlock < columns ) - resultMatrix->setElementFast( writeColumnPosition, - writeRowPosition + rowBlock, - matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); - } - } - -} - - -#endif template< typename Real, typename Device, diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h index 813e58bc4..96930b386 100644 --- a/src/TNL/Matrices/details/DenseMatrix.h +++ b/src/TNL/Matrices/details/DenseMatrix.h @@ -62,6 +62,259 @@ class DenseDeviceDependentCode< Devices::Cuda > } }; +#ifdef HAVE_CUDA +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix1, + typename Matrix2, + int tileDim, + int tileRowBlockSize > +__global__ void +DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix, + const Matrix1* matrixA, + const Matrix2* matrixB, + const Real matrixAMultiplicator, + const Real matrixBMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + /**** + * Here we compute product C = A * B. To profit from the fast + * shared memory we do it by tiles. + */ + + typedef Index IndexType; + typedef Real RealType; + __shared__ Real tileA[ tileDim*tileDim ]; + __shared__ Real tileB[ tileDim*tileDim ]; + __shared__ Real tileC[ tileDim*tileDim ]; + + const IndexType& matrixARows = matrixA->getRows(); + const IndexType& matrixAColumns = matrixA->getColumns(); + const IndexType& matrixBRows = matrixB->getRows(); + const IndexType& matrixBColumns = matrixB->getColumns(); + + /**** + * Reset the tile C + */ + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0; + + /**** + * Compute the result tile coordinates + */ + const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim; + const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim; + + /**** + * Sum over the matrix tiles + */ + for( IndexType i = 0; i < matrixAColumns; i += tileDim ) + { + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixARow = resultTileRow + threadIdx.y + row; + const IndexType matrixAColumn = i + threadIdx.x; + if( matrixARow < matrixARows && matrixAColumn < matrixAColumns ) + tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixAMultiplicator * matrixA->getElementFast( matrixARow, matrixAColumn ); + + const IndexType matrixBRow = i + threadIdx.y + row; + const IndexType matrixBColumn = resultTileColumn + threadIdx.x; + if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns ) + tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn ); + } + __syncthreads(); + + const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow ); + const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i ); + const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i ); + const IndexType tileBLastColumn = + tnlCudaMin( tileDim, matrixBColumns - resultTileColumn ); + + for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize ) + { + RealType sum( 0.0 ); + for( IndexType j = 0; j < tileALastColumn; j++ ) + sum += tileA[ ( threadIdx.y + row )*tileDim + j ]* + tileB[ j*tileDim + threadIdx.x ]; + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum; + } + __syncthreads(); + } + + /**** + * Write the result tile to the result matrix + */ + const IndexType& matrixCRows = resultMatrix->getRows(); + const IndexType& matrixCColumns = resultMatrix->getColumns(); + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixCRow = resultTileRow + row + threadIdx.y; + const IndexType matrixCColumn = resultTileColumn + threadIdx.x; + if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns ) + resultMatrix->setElementFast( matrixCRow, + matrixCColumn, + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] ); + } + +} + +template< typename Real, + typename Index, + typename Matrix, + bool RowMajorOrder, + typename RealAllocator, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + + } + +} + +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + if( readColumnPosition < columns ) + { + const Index readOffset = readRowPosition * columns + readColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( readRowPosition + rowBlock < rows ) + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + if( writeColumnPosition < rows ) + { + const Index writeOffset = writeRowPosition * rows + writeColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( writeRowPosition + rowBlock < columns ) + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + } + } + +} + +#endif + } //namespace details } //namepsace Matrices } //namespace TNL \ No newline at end of file -- GitLab From 41348dac910dad6d84ba9ebd111b0f05b038d942 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 10:59:17 +0100 Subject: [PATCH 173/179] Deleted unused dense matrix device dependent code. --- src/TNL/Matrices/DenseMatrixView.hpp | 46 ---------------------------- 1 file changed, 46 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 50f30d889..01415ec21 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -699,51 +699,5 @@ Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( co return this->segments.getGlobalIndex( row, column ); } -/*template<> -class DenseDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename InVector, - typename OutVector > - static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class DenseDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Real, - typename Index, - bool RowMajorOrder, - typename RealAllocator, - typename InVector, - typename OutVector > - static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -};*/ - } // namespace Matrices } // namespace TNL -- GitLab From 939b17f9c5bac758cfd35bc57a48f4342f58fa64 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 13:21:17 +0100 Subject: [PATCH 174/179] Removed useless virtual methods from Matrix. --- src/TNL/Matrices/Legacy/AdEllpack.h | 33 +++++++------- src/TNL/Matrices/Legacy/AdEllpack_impl.h | 10 +++++ src/TNL/Matrices/Legacy/BiEllpack.h | 15 ++++--- src/TNL/Matrices/Legacy/BiEllpack_impl.h | 46 ++++++++++++-------- src/TNL/Matrices/Legacy/CSR.h | 11 +++-- src/TNL/Matrices/Legacy/CSR_impl.h | 22 +++++++--- src/TNL/Matrices/Legacy/Ellpack.h | 13 +++--- src/TNL/Matrices/Legacy/Ellpack_impl.h | 22 +++++++--- src/TNL/Matrices/Legacy/SlicedEllpack.h | 3 ++ src/TNL/Matrices/Legacy/SlicedEllpack_impl.h | 13 +++++- src/TNL/Matrices/Matrix.h | 31 ++----------- src/TNL/Matrices/Matrix.hpp | 21 --------- 12 files changed, 130 insertions(+), 110 deletions(-) diff --git a/src/TNL/Matrices/Legacy/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h index 3d2db7b96..1135084ee 100644 --- a/src/TNL/Matrices/Legacy/AdEllpack.h +++ b/src/TNL/Matrices/Legacy/AdEllpack.h @@ -10,8 +10,8 @@ /**** * This class implements AdELL format from: - * - * Maggioni M., Berger-Wolf T., + * + * Maggioni M., Berger-Wolf T., * AdELL: An Adaptive Warp-Balancing ELL Format for Efficient Sparse Matrix-Vector Multiplication on GPUs, * In proceedings of 42nd International Conference on Parallel Processing, 2013. */ @@ -33,7 +33,7 @@ struct warpInfo using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; - + IndexType offset; IndexType rowOffset; IndexType localLoad; @@ -47,7 +47,7 @@ template< typename MatrixType > class warpList { public: - + using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; @@ -74,7 +74,7 @@ public: { return this->tail; } ~warpList(); - + void printList() { if( this->getHead() == this->getTail() ) @@ -114,7 +114,7 @@ private: // friend class will be needed for templated assignment operators template< typename Real2, typename Device2, typename Index2 > friend class AdEllpack; - + public: typedef Real RealType; @@ -122,6 +122,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; template< typename _Real = Real, typename _Device = Device, @@ -132,6 +133,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getWarp( const IndexType row ) const; IndexType getInWarpOffset( const IndexType row, @@ -143,7 +146,7 @@ public: void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix ); void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const; @@ -186,7 +189,7 @@ public: typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - + // copy assignment AdEllpack& operator=( const AdEllpack& matrix ); @@ -194,7 +197,7 @@ public: template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix ); - + void save( File& file ) const; void load( File& file ); @@ -242,29 +245,29 @@ public: void spmvCuda4( const InVector& inVector, OutVector& outVector, const int gridIdx ) const; - + template< typename InVector, typename OutVector > __device__ void spmvCuda8( const InVector& inVector, OutVector& outVector, const int gridIdx ) const; - + template< typename InVector, typename OutVector > __device__ void spmvCuda16( const InVector& inVector, OutVector& outVector, - const int gridIdx ) const; + const int gridIdx ) const; template< typename InVector, typename OutVector > __device__ void spmvCuda32( const InVector& inVector, OutVector& outVector, - const int gridIdx ) const; - - + const int gridIdx ) const; + + #endif diff --git a/src/TNL/Matrices/Legacy/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h index 234e18f94..242a3c81f 100644 --- a/src/TNL/Matrices/Legacy/AdEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h @@ -220,6 +220,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) } } +template< typename Real, + typename Device, + typename Index > +void AdEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Legacy/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h index fe3fd9e11..1a92581c7 100644 --- a/src/TNL/Matrices/Legacy/BiEllpack.h +++ b/src/TNL/Matrices/Legacy/BiEllpack.h @@ -32,7 +32,7 @@ template< typename Real, typename Device, typename Index > class BiEllpack : public Sparse< Real, Device, Index > { private: - + // convenient template alias for controlling the selection of copy-assignment operator template< typename Device2 > using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; @@ -40,13 +40,14 @@ private: // friend class will be needed for templated assignment operators template< typename Real2, typename Device2, typename Index2 > friend class BiEllpack; - + public: typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; @@ -62,15 +63,17 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; template< typename Real2, typename Device2, typename Index2 > void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix ); - + void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const; @@ -142,7 +145,7 @@ public: IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; - + // copy assignment BiEllpack& operator=( const BiEllpack& matrix ); @@ -160,7 +163,7 @@ public: void load( const String& fileName ); void print( std::ostream& str ) const; - + void printValues() const; void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths ); diff --git a/src/TNL/Matrices/Legacy/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h index 36732a39a..6db2ed609 100644 --- a/src/TNL/Matrices/Legacy/BiEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h @@ -78,9 +78,9 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) CompressedRowLengthsVector rowLengths; rowLengths.reset(); rowLengths.setLike( constRowLengths ); - + rowLengths = constRowLengths; - + if( this->getRows() % this->warpSize != 0 ) this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) ); else @@ -88,7 +88,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) IndexType strips = this->virtualRows / this->warpSize; this->rowPermArray.setSize( this->rows ); this->groupPointers.setSize( strips * ( this->logWarpSize + 1 ) + 1 ); - + this->groupPointers.setValue( 0 ); DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); @@ -103,6 +103,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) return this->allocateMatrixElements( this->warpSize * this->groupPointers.getElement( strips * ( this->logWarpSize + 1 ) ) ); } +template< typename Real, + typename Device, + typename Index > +void BiEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -128,7 +138,7 @@ Index BiEllpack< Real, Device, Index >::getNumberOfGroups( const IndexType row ) IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; - IndexType bisection = 1; + IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) { if( rowStripPermutation < bisection ) @@ -148,7 +158,7 @@ template< typename Real, typename Index > Index BiEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const { - TNL_ASSERT( row >= 0 && row < this->getRows(), + TNL_ASSERT( row >= 0 && row < this->getRows(), std::cerr << "row = " << row << " this->getRows() = " << this->getRows() ); const IndexType strip = row / this->warpSize; @@ -182,7 +192,7 @@ template< typename Real, typename Device2, typename Index2 > void BiEllpack< Real, Device, Index >::setLike( const BiEllpack< Real2, Device2, Index2 >& matrix ) -{ +{ Sparse< Real, Device, Index >::setLike( matrix ); this->rowPermArray.setLike( matrix.rowPermArray ); this->groupPointers.setLike( matrix.groupPointers ); @@ -212,9 +222,9 @@ bool BiEllpack< Real, Device, Index >::operator == ( const BiEllpack< Real2, Dev << " matrix.getRows() = " << matrix.getRows() << " this->getColumns() = " << this->getColumns() << " matrix.getColumns() = " << matrix.getColumns() ); - + TNL_ASSERT_TRUE( false, "operator == is not yet implemented for BiEllpack."); - + // TODO: implement this return false; } @@ -284,10 +294,10 @@ bool BiEllpack< Real, Device, Index >::addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - const IndexType strip = row / this->warpSize; - const IndexType groupBegin = strip * ( this->logWarpSize + 1 ); - const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize; - IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm; + const IndexType strip = row / this->warpSize; + const IndexType groupBegin = strip * ( this->logWarpSize + 1 ); + const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize; + IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm; IndexType rowMultiplicator = 1; IndexType step = this->warpSize; @@ -685,7 +695,7 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In "unknown device" ); static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, "unknown device" ); - + this->setLike( matrix ); this->values = matrix.values; this->columnIndexes = matrix.columnIndexes; @@ -777,14 +787,14 @@ void BiEllpack< Real, Device, Index >::printValues() const { for( Index i = 0; i < this->values.getSize(); i++ ) { if( this->columnIndexes.getElement( i ) != this->getColumns() ) - std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) + std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl; } - + for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) { std::cout << "rowPermArray[ " << i << " ] = " << this->rowPermArray.getElement( i ) << std::endl; } - + for( Index i = 0; i < this->groupPointers.getSize(); i++ ) { std::cout << "groupPointers[ " << i << " ] = " << this->groupPointers.getElement( i ) << std::endl; } @@ -1146,7 +1156,7 @@ void BiEllpack< Real, Device, Index >::spmvCuda( const InVector& inVector, __syncthreads(); if( warpStart + inWarpIdx >= this->getRows() ) return; - + outVector[ warpStart + inWarpIdx ] = results[ this->rowPermArray[ warpStart + inWarpIdx ] & ( cudaBlockSize - 1 ) ]; } #endif @@ -1321,7 +1331,7 @@ public: const Index begin = matrix.groupPointers.getElement( groupBegin ) * matrix.warpSize + rowStripPerm * stripLength; Index elementPtr = begin; Index rowLength = 0; - + for( Index group = 0; group < matrix.getNumberOfGroups( row ); group++ ) { for( Index i = 0; i < matrix.getGroupLength( strip, group ); i++ ) diff --git a/src/TNL/Matrices/Legacy/CSR.h b/src/TNL/Matrices/Legacy/CSR.h index b68434252..a31f3ee76 100644 --- a/src/TNL/Matrices/Legacy/CSR.h +++ b/src/TNL/Matrices/Legacy/CSR.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include #include @@ -18,7 +18,7 @@ namespace TNL { namespace Matrices { - + #ifdef HAVE_UMFPACK template< typename Matrix, typename Preconditioner > class UmfpackWrapper; @@ -48,6 +48,7 @@ public: using DeviceType = Device; using IndexType = Index; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef Sparse< Real, Device, Index > BaseType; using MatrixRow = typename BaseType::MatrixRow; @@ -71,13 +72,15 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; IndexType getNonZeroRowLength( const IndexType row ) const; - + __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; @@ -264,7 +267,7 @@ protected: int cudaWarpSize, hybridModeSplit; typedef CSRDeviceDependentCode< DeviceType > DeviceDependentCode; - + friend class CSRDeviceDependentCode< DeviceType >; friend class CusparseCSR< RealType >; }; diff --git a/src/TNL/Matrices/Legacy/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h index 08b35f563..5fec923f0 100644 --- a/src/TNL/Matrices/Legacy/CSR_impl.h +++ b/src/TNL/Matrices/Legacy/CSR_impl.h @@ -20,7 +20,7 @@ #endif namespace TNL { -namespace Matrices { +namespace Matrices { #ifdef HAVE_CUSPARSE template< typename Real, typename Index > @@ -99,6 +99,16 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng this->columnIndexes.setValue( this->columns ); } +template< typename Real, + typename Device, + typename Index > +void CSR< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -131,7 +141,7 @@ template< typename Real, typename Index > __cuda_callable__ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const -{ +{ ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } @@ -884,10 +894,10 @@ template<> class tnlCusparseCSRWrapper< float, int > { public: - + typedef float Real; typedef int Index; - + static void vectorProduct( const Index rows, const Index columns, const Index nnz, @@ -924,10 +934,10 @@ template<> class tnlCusparseCSRWrapper< double, int > { public: - + typedef double Real; typedef int Index; - + static void vectorProduct( const Index rows, const Index columns, const Index nnz, diff --git a/src/TNL/Matrices/Legacy/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h index 5f6e666f9..eea58b757 100644 --- a/src/TNL/Matrices/Legacy/Ellpack.h +++ b/src/TNL/Matrices/Legacy/Ellpack.h @@ -14,7 +14,7 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class EllpackDeviceDependentCode; @@ -37,6 +37,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef Sparse< Real, Device, Index > BaseType; @@ -59,20 +60,22 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + void setConstantCompressedRowLengths( const IndexType& rowLengths ); IndexType getRowLength( const IndexType row ) const; __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; - + IndexType getNonZeroRowLength( const IndexType row ) const; template< typename Real2, typename Device2, typename Index2 > void setLike( const Ellpack< Real2, Device2, Index2 >& matrix ); void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const Ellpack< Real2, Device2, Index2 >& matrix ) const; @@ -175,9 +178,9 @@ public: const Vector& old_x, Vector& x, const RealType& omega ) const; - + // copy assignment - Ellpack& operator=( const Ellpack& matrix ); + Ellpack& operator=( const Ellpack& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, diff --git a/src/TNL/Matrices/Legacy/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h index 656c3f7c2..04ca10385 100644 --- a/src/TNL/Matrices/Legacy/Ellpack_impl.h +++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h @@ -33,7 +33,7 @@ String Ellpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::Ellpack< " ) + String( TNL::getType< Real >() ) + - ", [any device], " + + ", [any device], " + getType< Index >() + String( " >" ); } @@ -66,7 +66,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows, IndexType missingRows = this->rows - this->alignedRows; missingRows = roundToMultiple( missingRows, Cuda::getWarpSize() ); - + this->alignedRows += missingRows; } } @@ -86,10 +86,20 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" ); this->rowLengths = this->maxRowLength = max( rowLengths ); - + allocateElements(); } +template< typename Real, + typename Device, + typename Index > +void Ellpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -769,13 +779,13 @@ template< typename Real, void Ellpack< Real, Device, Index >::allocateElements() { IndexType numMtxElmnts = this->alignedRows * this->rowLengths; - + if( this->alignedRows != 0 ) { - TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, + TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, "Ellpack cannot store this matrix. The number of matrix elements has overflown the value that IndexType is capable of storing" ); } - + Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths ); } diff --git a/src/TNL/Matrices/Legacy/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h index b79913b23..63b433087 100644 --- a/src/TNL/Matrices/Legacy/SlicedEllpack.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h @@ -66,6 +66,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef Sparse< Real, Device, Index > BaseType; @@ -89,6 +90,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ diff --git a/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h index bfba092ff..8673a02c5 100644 --- a/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h @@ -16,7 +16,7 @@ #include namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, @@ -83,6 +83,17 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } +template< typename Real, + typename Device, + typename Index, + int SliceSize > +void SlicedEllpack< Real, Device, Index, SliceSize >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 3c0fd8a9b..4fce8358d 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -47,26 +47,15 @@ public: const IndexType columns, const RealAllocatorType& allocator = RealAllocatorType() ); - virtual void setDimensions( const IndexType rows, - const IndexType columns ); - - virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0; - - [[deprecated]] - virtual IndexType getRowLength( const IndexType row ) const = 0; - - // TODO: implementation is not parallel - // TODO: it would be nice if padding zeros could be stripped - //void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; - - virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + void setDimensions( const IndexType rows, + const IndexType columns ); template< typename Matrix_ > void setLike( const Matrix_& matrix ); IndexType getAllocatedElementsCount() const; - virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; + IndexType getNumberOfNonzeroMatrixElements() const; void reset(); @@ -76,20 +65,6 @@ public: __cuda_callable__ IndexType getColumns() const; - //virtual TODO: uncomment - void setElement( const IndexType row, - const IndexType column, - const RealType& value );// = 0; - - //virtual TODO: uncomment - void addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 );// = 0; - - virtual Real getElement( const IndexType row, - const IndexType column ) const = 0; - const ValuesVectorType& getValues() const; ValuesVectorType& getValues(); diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 0236f94f7..84dc6ef47 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -56,27 +56,6 @@ void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexTyp this->columns = columns; } -/*template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const -{ - rowLengths.setSize( this->getRows() ); - getCompressedRowLengths( rowLengths.getView() ); -}*/ - -template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -void Matrix< Real, Device, Index, RealAllocator >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const -{ - TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); - for( IndexType row = 0; row < this->getRows(); row++ ) - rowLengths.setElement( row, this->getRowLength( row ) ); -} - template< typename Real, typename Device, typename Index, -- GitLab From 8d13b8225bccc41087257f961e8f516bfbfaed75 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 13:35:37 +0100 Subject: [PATCH 175/179] Matrix methods for coloring marked as deprecated, method Matrix::help was erased. --- src/TNL/Matrices/Matrix.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 4fce8358d..48e6ebf7c 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -84,16 +84,14 @@ public: // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] __cuda_callable__ const IndexType& getNumberOfColors() const; // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] void computeColorsVector(Containers::Vector &colorsVector); - // TODO: what is this supposed to do?!? There are redefinitions only in the - // EllpackSymmetricGraph and SlicedEllpackSymmetricGraph classes... - bool help( bool verbose = false ) { return true;}; - // TODO: copy should be done in the operator= and it should work the other way too void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix ); @@ -103,7 +101,10 @@ public: protected: - IndexType rows, columns, numberOfColors; + IndexType rows, columns; + + // TODO: remove1 + IndexType numberOfColors; ValuesVectorType values; }; -- GitLab From 5777de50e8e7bedddcc688a7a88e9f333cdfeef6 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber Date: Tue, 3 Mar 2020 16:37:46 +0100 Subject: [PATCH 176/179] Deleted unused method Matrix::getValuesSize(). --- src/TNL/Matrices/Matrix.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 48e6ebf7c..8e467b004 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -96,8 +96,8 @@ public: void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix ); // TODO: missing implementation! - __cuda_callable__ - Index getValuesSize() const; + //__cuda_callable__ + //Index getValuesSize() const; protected: -- GitLab From 5b34b208296251963c1f3d010c6e3b69b4ade2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 3 Mar 2020 17:20:28 +0100 Subject: [PATCH 177/179] Deleted useless methods in Matrix and MatrixView. --- src/TNL/Matrices/Matrix.h | 9 +--- src/TNL/Matrices/Matrix.hpp | 15 ------- src/TNL/Matrices/MatrixView.h | 35 ---------------- src/TNL/Matrices/MatrixView.hpp | 74 --------------------------------- 4 files changed, 1 insertion(+), 132 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 8e467b004..9ce1a109a 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -92,18 +92,11 @@ public: [[deprecated]] void computeColorsVector(Containers::Vector &colorsVector); - // TODO: copy should be done in the operator= and it should work the other way too - void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix ); - - // TODO: missing implementation! - //__cuda_callable__ - //Index getValuesSize() const; - protected: IndexType rows, columns; - // TODO: remove1 + // TODO: remove IndexType numberOfColors; ValuesVectorType values; diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index 84dc6ef47..b7e000670 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -250,21 +250,6 @@ computeColorsVector(Containers::Vector &colorsVector) } } -template< typename Real, - typename Device, - typename Index, - typename RealAllocator > -void -Matrix< Real, Device, Index, RealAllocator >:: -copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix ) -{ - this->numberOfColors = matrix.getNumberOfColors(); - this->columns = matrix.getColumns(); - this->rows = matrix.getRows(); - - this->values.setSize( matrix.getValuesSize() ); -} - #ifdef HAVE_CUDA template< typename Matrix, typename InVector, diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index cd1beda9c..d52b9a24a 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -49,14 +49,6 @@ public: __cuda_callable__ MatrixView( const MatrixView& view ) = default; - virtual IndexType getRowLength( const IndexType row ) const = 0; - - // TODO: implementation is not parallel - // TODO: it would be nice if padding zeros could be stripped - void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; - - virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; - IndexType getAllocatedElementsCount() const; virtual IndexType getNumberOfNonzeroMatrixElements() const; @@ -67,24 +59,6 @@ public: __cuda_callable__ IndexType getColumns() const; - /**** - * TODO: The fast variants of the following methods cannot be virtual. - * If they were, they could not be used in the CUDA kernels. If CUDA allows it - * in the future and it does not slow down, declare them as virtual here. - */ - - virtual void setElement( const IndexType row, - const IndexType column, - const RealType& value ) = 0; - - virtual void addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ) = 0; - - virtual Real getElement( const IndexType row, - const IndexType column ) const = 0; - __cuda_callable__ const ValuesView& getValues() const; @@ -135,15 +109,6 @@ std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, I return str; } -/* -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ); -*/ - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp index 9fd73e519..dfac8f3af 100644 --- a/src/TNL/Matrices/MatrixView.hpp +++ b/src/TNL/Matrices/MatrixView.hpp @@ -42,29 +42,6 @@ MatrixView( const IndexType rows_, { } -template< typename Real, - typename Device, - typename Index > -void -MatrixView< Real, Device, Index >:: -getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const -{ - rowLengths.setSize( this->getRows() ); - getCompressedRowLengths( rowLengths.getView() ); -} - -template< typename Real, - typename Device, - typename Index > -void -MatrixView< Real, Device, Index >:: -getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const -{ - TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); - for( IndexType row = 0; row < this->getRows(); row++ ) - rowLengths.setElement( row, this->getRowLength( row ) ); -} - template< typename Real, typename Device, typename Index > @@ -244,56 +221,5 @@ computeColorsVector(Containers::Vector &colorsVector) } } -/* -#ifdef HAVE_CUDA -template< typename Matrix, - typename InVector, - typename OutVector > -__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, - const InVector* inVector, - OutVector* outVector, - int gridIdx ) -{ - static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < matrix->getRows() ) - ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); -} -#endif - -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_this, - kernel_inVector, - kernel_outVector, - gridIdx ); - TNL_CHECK_CUDA_DEVICE; - } - Cuda::freeFromDevice( kernel_this ); - Cuda::freeFromDevice( kernel_inVector ); - Cuda::freeFromDevice( kernel_outVector ); - TNL_CHECK_CUDA_DEVICE; -#endif -} -*/ - } // namespace Matrices } // namespace TNL -- GitLab From aeb3be4c38d021feeec5e7eca9e9a20e6c456a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 3 Mar 2020 17:21:12 +0100 Subject: [PATCH 178/179] Marked methods related to matrix coloring as deprecated. They will be moved to some other place. --- src/TNL/Matrices/MatrixView.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h index d52b9a24a..895510181 100644 --- a/src/TNL/Matrices/MatrixView.h +++ b/src/TNL/Matrices/MatrixView.h @@ -89,10 +89,12 @@ public: // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] __cuda_callable__ const IndexType& getNumberOfColors() const; // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] void computeColorsVector(Containers::Vector &colorsVector); protected: -- GitLab From 456375d0c8dd4caeb1d64776e81b45f0c48880e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Tue, 3 Mar 2020 17:41:33 +0100 Subject: [PATCH 179/179] Deleted unused method Matrix::MatrixVectorProductCuda. --- src/TNL/Matrices/Matrix.h | 7 ------ src/TNL/Matrices/Matrix.hpp | 49 ------------------------------------- 2 files changed, 56 deletions(-) diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 9ce1a109a..129a54cbe 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index return str; } -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ); - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index b7e000670..ce5f52274 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector &colorsVector) } } -#ifdef HAVE_CUDA -template< typename Matrix, - typename InVector, - typename OutVector > -__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, - const InVector* inVector, - OutVector* outVector, - int gridIdx ) -{ - static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < matrix->getRows() ) - ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); -} -#endif - -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_this, - kernel_inVector, - kernel_outVector, - gridIdx ); - TNL_CHECK_CUDA_DEVICE; - } - Cuda::freeFromDevice( kernel_this ); - Cuda::freeFromDevice( kernel_inVector ); - Cuda::freeFromDevice( kernel_outVector ); - TNL_CHECK_CUDA_DEVICE; -#endif -} - } // namespace Matrices } // namespace TNL -- GitLab