Loading src/TNL/Containers/Segments/CSR.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -218,7 +218,7 @@ void CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; Loading src/TNL/Containers/Segments/SlicedEllpack.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes ) const auto sizes_view = sizes.getConstView(); auto slices_view = this->sliceOffsets.getView(); auto slice_segment_size_view = this->sliceSegmentSizes.getView(); auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { if( globalIdx < _size ) return sizes_view[ globalIdx ]; return 0; Loading Loading @@ -341,7 +341,7 @@ void SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading src/TNL/Matrices/Dense.h +3 −13 Original line number Diff line number Diff line Loading @@ -30,15 +30,6 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { private: // convenient template alias for controlling the selection of copy-assignment operator template< typename Device2 > using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; // friend class will be needed for templated assignment operators //template< typename Real2, typename Device2, typename Index2 > //friend class Dense; public: using RealType = Real; using DeviceType = Device; Loading Loading @@ -176,12 +167,11 @@ class Dense : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment Dense& operator=( const Dense& matrix ); //Dense& operator=( const Dense& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; Loading src/TNL/Matrices/Dense.hpp +13 −27 Original line number Diff line number Diff line Loading @@ -373,7 +373,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); Loading Loading @@ -959,39 +959,25 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } // copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix ) Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { if( RowMajorOrder == RowMajorOrder_ ) { this->setLike( matrix ); this->values = matrix.values; return *this; this->values = matrix.getValues(); } // cross-device copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real2, typename Device2, typename Index2, typename > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) else { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, "unknown device" ); this->setLike( matrix ); throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); } } template< typename Real, Loading src/TNL/Matrices/SparseMatrix.h +1 −0 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include <TNL/Containers/Segments/CSR.h> #include <TNL/Matrices/SparseMatrixRowView.h> #include <TNL/Matrices/SparseMatrixView.h> #include <TNL/Matrices/Dense.h> namespace TNL { namespace Matrices { Loading Loading
src/TNL/Containers/Segments/CSR.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -218,7 +218,7 @@ void CSR< Device, Index, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto offsetsView = this->offsets.getConstView(); auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { const IndexType begin = offsetsView[ i ]; Loading
src/TNL/Containers/Segments/SlicedEllpack.hpp +2 −2 Original line number Diff line number Diff line Loading @@ -127,7 +127,7 @@ setSegmentsSizes( const SizesHolder& sizes ) const auto sizes_view = sizes.getConstView(); auto slices_view = this->sliceOffsets.getView(); auto slice_segment_size_view = this->sliceSegmentSizes.getView(); auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType globalIdx, bool& compute ) -> IndexType { auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { if( globalIdx < _size ) return sizes_view[ globalIdx ]; return 0; Loading Loading @@ -341,7 +341,7 @@ void SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); if( RowMajorOrder ) Loading
src/TNL/Matrices/Dense.h +3 −13 Original line number Diff line number Diff line Loading @@ -30,15 +30,6 @@ template< typename Real = double, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { private: // convenient template alias for controlling the selection of copy-assignment operator template< typename Device2 > using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; // friend class will be needed for templated assignment operators //template< typename Real2, typename Device2, typename Index2 > //friend class Dense; public: using RealType = Real; using DeviceType = Device; Loading Loading @@ -176,12 +167,11 @@ class Dense : public Matrix< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment Dense& operator=( const Dense& matrix ); //Dense& operator=( const Dense& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ > Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix ); template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; Loading
src/TNL/Matrices/Dense.hpp +13 −27 Original line number Diff line number Diff line Loading @@ -373,7 +373,7 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool { function( rowIdx, columnIdx, values_view[ globalIdx ] ); function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); return true; }; this->segments.forSegments( first, last, f ); Loading Loading @@ -959,39 +959,25 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } // copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense& matrix ) Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) { if( RowMajorOrder == RowMajorOrder_ ) { this->setLike( matrix ); this->values = matrix.values; return *this; this->values = matrix.getValues(); } // cross-device copy assignment template< typename Real, typename Device, typename Index, bool RowMajorOrder, typename RealAllocator > template< typename Real2, typename Device2, typename Index2, typename > Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) else { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, "unknown device" ); this->setLike( matrix ); throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); } } template< typename Real, Loading
src/TNL/Matrices/SparseMatrix.h +1 −0 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include <TNL/Containers/Segments/CSR.h> #include <TNL/Matrices/SparseMatrixRowView.h> #include <TNL/Matrices/SparseMatrixView.h> #include <TNL/Matrices/Dense.h> namespace TNL { namespace Matrices { Loading