diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index e5a8d9819aa7e3c8fb31eecd62ba4932b6c1c731..4c1903f6c38fd2990ca32c5250e882dedbc745c3 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -486,7 +486,7 @@ struct LinearSolversBenchmark DeviceType, IndexType, TNL::Matrices::GeneralMatrix, - Algorithms::Segments::CSR + Algorithms::Segments::CSRDefault >; SharedPointer< CSR > matrixCopy; Matrices::copySparseMatrix( *matrixCopy, *matrixPointer ); diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h index ec0fd001860959efa0492e3a4c8497948ab5c010..3416ad3eff73bb9e0567623f68c83d08d84f72bb 100644 --- a/src/Benchmarks/SpMV/spmv-legacy.h +++ b/src/Benchmarks/SpMV/spmv-legacy.h @@ -49,7 +49,7 @@ using SlicedEllpackAlias = Matrices::Legacy::SlicedEllpack< Real, Device, Index // Segments based sparse matrix aliases template< typename Real, typename Device, typename Index > -using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSR >; +using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault >; template< typename Device, typename Index, typename IndexAllocator > using EllpackSegments = Algorithms::Segments::Ellpack< Device, Index, IndexAllocator >; diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h index 9d2b84b618f835e1578b5441fbfe9cbbdddd8033..ef958a252bc533b84da9da6c3516129362f068ae 100644 --- a/src/TNL/Algorithms/Segments/CSR.h +++ b/src/TNL/Algorithms/Segments/CSR.h @@ -22,6 +22,7 @@ namespace TNL { template< typename Device, typename Index, + CSRKernelTypes KernelType_ = CSRScalarKernel, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class CSR { @@ -36,6 +37,7 @@ class CSR using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + CSRKernelTypes KernelType = KernelType_; CSR(); @@ -114,8 +116,8 @@ class CSR CSR& operator=( const CSR& rhsSegments ) = default; - template< typename Device_, typename Index_, typename IndexAllocator_ > - CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ); + template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > + CSR& operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ); void save( File& file ) const; @@ -125,6 +127,28 @@ class CSR OffsetsHolder offsets; }; + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +using CSRScalar = CSR< Device, Index, CSRScalarKernel, IndexAllocator >; + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +using CSRVector = CSR< Device, Index, CSRVectorKernel, IndexAllocator >; + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +using CSRLight = CSR< Device, Index, CSRLightKernel, IndexAllocator >; + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +using CSRDefault = CSRScalar< Device, Index, IndexAllocator >; + + } // namespace Segments } // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Algorithms/Segments/CSR.hpp b/src/TNL/Algorithms/Segments/CSR.hpp index a6b915db343ba840e5396796531f4af36ae67071..48e82de41a2169b79840bd4e19d9d4a834c7379c 100644 --- a/src/TNL/Algorithms/Segments/CSR.hpp +++ b/src/TNL/Algorithms/Segments/CSR.hpp @@ -22,16 +22,18 @@ namespace TNL { template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: CSR() { } template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const SegmentsSizes& segmentsSizes ) { this->setSegmentsSizes( segmentsSizes ); @@ -39,16 +41,18 @@ CSR( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR& csr ) : offsets( csr.offsets ) { } template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) { @@ -56,9 +60,10 @@ CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > String -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -66,9 +71,10 @@ getSerializationType() template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > String -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsType() { return ViewType::getSegmentsType(); @@ -76,10 +82,11 @@ getSegmentsType() template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > template< typename SizesHolder > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: setSegmentsSizes( const SizesHolder& sizes ) { details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); @@ -87,9 +94,10 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: reset() { this->offsets.setSize( 1 ); @@ -99,9 +107,10 @@ reset() template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -typename CSR< Device, Index, IndexAllocator >::ViewType -CSR< Device, Index, IndexAllocator >:: +typename CSR< Device, Index, KernelType_, IndexAllocator >::ViewType +CSR< Device, Index, KernelType_, IndexAllocator >:: getView() { return ViewType( this->offsets.getView() ); @@ -109,9 +118,10 @@ getView() template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > auto -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); @@ -119,8 +129,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -__cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: +__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; @@ -128,8 +139,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -__cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: +__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); @@ -137,8 +149,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -__cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: +__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getSize() const -> IndexType { return this->getStorageSize(); @@ -146,8 +159,9 @@ getSize() const -> IndexType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -__cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: +__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); @@ -155,8 +169,9 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -__cuda_callable__ auto CSR< Device, Index, IndexAllocator >:: +__cuda_callable__ auto CSR< Device, Index, KernelType_, IndexAllocator >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -172,10 +187,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > __cuda_callable__ auto -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); @@ -183,10 +199,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Function, typename... Args > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); @@ -194,10 +211,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator> template< typename Function, typename... Args > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -205,10 +223,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); @@ -216,10 +235,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -227,11 +247,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > - template< typename Device_, typename Index_, typename IndexAllocator_ > -CSR< Device, Index, IndexAllocator >& -CSR< Device, Index, IndexAllocator >:: -operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) + template< typename Device_, typename Index_, CSRKernelTypes KernelType__, typename IndexAllocator_ > +CSR< Device, Index, KernelType_, IndexAllocator >& +CSR< Device, Index, KernelType_, IndexAllocator >:: +operator=( const CSR< Device_, Index_, KernelType__, IndexAllocator_ >& source ) { this->offsets = source.offsets; return *this; @@ -239,9 +260,10 @@ operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > void -CSR< Device, Index, IndexAllocator >:: +CSR< Device, Index, KernelType_, IndexAllocator >:: save( File& file ) const { file << this->offsets; @@ -249,9 +271,10 @@ save( File& file ) const template< typename Device, typename Index, + CSRKernelTypes KernelType_, typename IndexAllocator > -void -CSR< Device, Index, IndexAllocator >:: +void +CSR< Device, Index, KernelType_, IndexAllocator >:: load( File& file ) { file >> this->offsets; diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h index 610864f5e13921e6d81b775ebf7b233ea8b6b3e9..b30863b8f7552f623061a256b2ba02de936ded4e 100644 --- a/src/TNL/Algorithms/Segments/CSRView.h +++ b/src/TNL/Algorithms/Segments/CSRView.h @@ -19,8 +19,11 @@ namespace TNL { namespace Algorithms { namespace Segments { +enum CSRKernelTypes { CSRScalarKernel, CSRVectorKernel, CSRLightKernel }; + template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ = CSRScalarKernel > class CSRView { public: @@ -28,12 +31,13 @@ class CSRView using DeviceType = Device; using IndexType = std::remove_const_t< Index >; using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >; - using ConstOffsetsView = typename Containers::Vector< Index, DeviceType,IndexType >::ConstViewType; + using ConstOffsetsView = typename Containers::Vector< Index, DeviceType, IndexType >::ConstViewType; using ViewType = CSRView; template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + CSRKernelTypes KernelType = KernelType_; __cuda_callable__ CSRView(); @@ -122,6 +126,23 @@ class CSRView OffsetsView offsets; }; + +template< typename Device, + typename Index > +using CSRViewScalar = CSRView< Device, Index, CSRScalarKernel >; + +template< typename Device, + typename Index > +using CSRViewVector = CSRView< Device, Index, CSRVectorKernel >; + +template< typename Device, + typename Index > +using CSRViewLight = CSRView< Device, Index, CSRLightKernel >; + +template< typename Device, + typename Index > +using CSRViewDefault = CSRViewScalar< Device, Index >; + } // namespace Segments } // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp index 5537a1233ce47638e37d91aa0ea656f6d7de8f62..7077d0f035c823bcd37517b5c03e3a4134ed62db 100644 --- a/src/TNL/Algorithms/Segments/CSRView.hpp +++ b/src/TNL/Algorithms/Segments/CSRView.hpp @@ -22,122 +22,136 @@ namespace TNL { template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: CSRView() { } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView& offsets_view ) : offsets( offsets_view ) { } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: CSRView( const OffsetsView&& offsets_view ) : offsets( offsets_view ) { } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView& csr_view ) : offsets( csr_view.offsets ) { } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: CSRView( const CSRView&& csr_view ) : offsets( std::move( csr_view.offsets ) ) { } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > String -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: getSerializationType() { return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > String -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: getSegmentsType() { return "CSR"; } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ -typename CSRView< Device, Index >::ViewType -CSRView< Device, Index >:: +typename CSRView< Device, Index, KernelType_ >::ViewType +CSRView< Device, Index, KernelType_ >:: getView() { return ViewType( this->offsets ); } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ auto -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: getConstView() const -> const ConstViewType { return ConstViewType( this->offsets.getConstView() ); } template< typename Device, - typename Index > -__cuda_callable__ auto CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +__cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentsCount() const -> IndexType { return this->offsets.getSize() - 1; } template< typename Device, - typename Index > -__cuda_callable__ auto CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +__cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); } template< typename Device, - typename Index > -__cuda_callable__ auto CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +__cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getSize() const -> IndexType { return this->getStorageSize(); } template< typename Device, - typename Index > -__cuda_callable__ auto CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +__cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getStorageSize() const -> IndexType { return details::CSR< Device, Index >::getStorageSize( this->offsets ); } template< typename Device, - typename Index > -__cuda_callable__ auto CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +__cuda_callable__ auto CSRView< Device, Index, KernelType_ >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( ! std::is_same< DeviceType, Devices::Host >::value ) @@ -152,20 +166,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > __cuda_callable__ auto -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets; @@ -181,51 +197,58 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > template< typename Function, typename... Args > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; const auto offsetsView = this->offsets.getConstView(); - auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { - const IndexType begin = offsetsView[ segmentIdx ]; - const IndexType end = offsetsView[ segmentIdx + 1 ]; - RealType aux( zero ); - IndexType localIdx( 0 ); - bool compute( true ); - for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) - aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); - keeper( segmentIdx, aux ); - }; + if( KernelType == CSRScalarKernel ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = offsetsView[ segmentIdx ]; + const IndexType end = offsetsView[ segmentIdx + 1 ]; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); + keeper( segmentIdx, aux ); + }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); } template< typename Device, - typename Index > -CSRView< Device, Index >& -CSRView< Device, Index >:: + typename Index, + CSRKernelTypes KernelType_ > +CSRView< Device, Index, KernelType_ >& +CSRView< Device, Index, KernelType_ >:: operator=( const CSRView& view ) { this->offsets.bind( view.offsets ); @@ -233,18 +256,20 @@ operator=( const CSRView& view ) } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: save( File& file ) const { file << this->offsets; } template< typename Device, - typename Index > + typename Index, + CSRKernelTypes KernelType_ > void -CSRView< Device, Index >:: +CSRView< Device, Index, KernelType_ >:: load( File& file ) { file >> this->offsets; diff --git a/src/TNL/Matrices/Legacy/CSR.h b/src/TNL/Matrices/Legacy/CSR.h index 7570eac8be54c31bf61f364abe2c5a02413b4234..42f68b1277f994197c561ef7a4d000b0e600878e 100644 --- a/src/TNL/Matrices/Legacy/CSR.h +++ b/src/TNL/Matrices/Legacy/CSR.h @@ -115,8 +115,11 @@ public: static constexpr Index THREADS_VECTOR = 128; static constexpr Index THREADS_LIGHT = 128; - /* Max length of row to process one warp */ - static constexpr Index MAX_ELEMENTS_PER_WARP = 1024; + /* Max length of row to process one warp for CSR Light, MultiVector */ + static constexpr Index MAX_ELEMENTS_PER_WARP = 384; + + /* Max length of row to process one warp for CSR Adaptive */ + static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = 512; /* How many shared memory use per block in CSR Adaptive kernel */ static constexpr Index SHARED_PER_BLOCK = 24576; diff --git a/src/TNL/Matrices/Legacy/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h index 580b63456c2071cb4b27a2c83e5d4e5737b91cd2..7a610c8257cb4450035fa0c46a928b0f84b377f5 100644 --- a/src/TNL/Matrices/Legacy/CSR_impl.h +++ b/src/TNL/Matrices/Legacy/CSR_impl.h @@ -143,7 +143,7 @@ Index findLimit(const Index start, type = Type::STREAM; return current; } else { // one long row - if (sum <= 2 * matrix.MAX_ELEMENTS_PER_WARP) + if (sum <= 2 * matrix.MAX_ELEMENTS_PER_WARP_ADAPT) type = Type::VECTOR; else type = Type::LONG; @@ -1764,8 +1764,8 @@ void SpMVCSRAdaptivePrepare( const Real *inVector, SpMVCSRAdaptive< Real, Index, warpSize, matrix.WARPS, - matrix.SHARED_PER_WARP, - matrix.MAX_ELEMENTS_PER_WARP > + matrix.SHARED_PER_WARP, + matrix.MAX_ELEMENTS_PER_WARP_ADAPT > <<>>( inVector, outVector, diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 6d068f370f3be3a2d8b6eea20386054d6c984776..581d79c983d2a0961cfe7576c07e2d2dc9d5e5f9 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -45,7 +45,7 @@ template< typename Real = double, typename Device = Devices::Host, typename Index = int, typename MatrixType = GeneralMatrix, - template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Algorithms::Segments::CSR, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Algorithms::Segments::CSRDefault, typename ComputeReal = typename ChooseSparseMatrixComputeReal< Real, Index >::type, typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > @@ -209,13 +209,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); /** -<<<<<<< HEAD * \brief Constructor with matrix rows capacities and number of columns. * -======= - * \brief Constructor with matrix rows capacities given as an initializer list and a number of columns. - * ->>>>>>> Added SparseMatrix constructor with row capacities vector. * The number of matrix rows is given by the size of \e rowCapacities list. * * \tparam ListIndex is the initializer list values type. @@ -238,9 +233,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constructor with matrix rows capacities given as a vector and number of columns. - * + * * The number of matrix rows is given by the size of \e rowCapacities vector. - * + * * \tparam RowCapacitiesVector is the row capacities vector type. Usually it is some of * \ref TNL::Containers::Array, \ref TNL::Containers::ArrayView, \ref TNL::Containers::Vector or * \ref TNL::Containers::VectorView. @@ -249,7 +244,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \param columns is the number of matrix columns. * \param realAllocator is used for allocation of matrix elements values. * \param indexAllocator is used for allocation of matrix elements column indexes. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_rowCapacities_vector.cpp * \par Output diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h index f91e471e8a2158de04f46946513e149ac530f338..a753332a9dc725f5bab475891aef113d75886e20 100644 --- a/src/TNL/Matrices/SparseMatrixView.h +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -36,10 +36,10 @@ struct ChooseSparseMatrixComputeReal< bool, Index > * * It serves as an accessor to \ref SparseMatrix for example when passing the * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels. - * - * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated + * + * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated * as binary and so the matrix elements values are not stored in the memory since we need - * to remember only coordinates of non-zero elements( which equal one). + * to remember only coordinates of non-zero elements( which equal one). * \tparam Device is a device where the matrix is allocated. * \tparam Index is a type for indexing of the matrix elements. * \tparam MatrixType specifies a symmetry of matrix. See \ref MatrixType. Symmetric @@ -50,13 +50,13 @@ struct ChooseSparseMatrixComputeReal< bool, Index > * \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack. * \tparam ComputeReal is the same as \e Real mostly but for binary matrices it is set to \e Index type. This can be changed * bu the user, of course. - * + * */ template< typename Real, typename Device = Devices::Host, typename Index = int, typename MatrixType = GeneralMatrix, - template< typename Device_, typename Index_ > class SegmentsView = Algorithms::Segments::CSRView, + template< typename Device_, typename Index_ > class SegmentsView = Algorithms::Segments::CSRViewDefault, typename ComputeReal = typename ChooseSparseMatrixComputeReal< Real, Index >::type > class SparseMatrixView : public MatrixView< Real, Device, Index > { diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h index c4b409bb3a047e12410066bddbe9bdcce509ee89..033e65f63a060ee63bb52bd338ddf4862b700781 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h @@ -77,7 +77,7 @@ public: protected: // The factors L and U are stored separately and the rows of U are reversed. - Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSR > L, U; + Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U; // Specialized methods to distinguish between normal and distributed matrices // in the implementation. diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h index d46f3f900f4357dd7bf15dce170e5d63ecf22497..bc6b841acce8b1acb10ee9f67712cadd351c1133 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h @@ -66,7 +66,7 @@ protected: Real tau = 1e-4; // The factors L and U are stored separately and the rows of U are reversed. - Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSR > L, U; + Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U; // Specialized methods to distinguish between normal and distributed matrices // in the implementation. diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h index 8a6e0abddda2cfcb9acd70c6b8ba350cfeb3d28e..609a6afd74cd3e37ab1856529815b79ffd3ab9cf 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index template< typename Device, typename Index, typename IndexAllocator > using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >; -using CSR_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; -using CSR_cuda = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; +using CSR_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; +using CSR_cuda = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; using E_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using E_cuda = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using SE_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h index 8f7dad73c719fd5121650a4e6170149a69075036..5a4e98915cbec11a8107194b5b3ed33ab26e4e8c 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -29,11 +29,11 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int > + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >, - TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int > + ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >, + TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int > #endif >; diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h index d86eb57f5cf6fbdaafe51734d9ea834f2bb8823e..dfdcc3b83556183e6e935b21545dbd5b2c8c3347 100644 --- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index template< typename Device, typename Index, typename IndexAllocator > using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >; -using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; -using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index c9f68b5885849209b0e5d1848a16c313b3a32fbd..826b7af6b6bcb2cf76b7f0a6f81492341bd51fbe 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index template< typename Device, typename Index, typename IndexAllocator > using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >; -using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; -using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >; +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >; using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index e090f5f62e920b2e307d551ccc2301e36027b6fb..639876875d0dac5f44550721d63407802bc46ca9 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -20,23 +20,23 @@ const char* saveAndLoadFileName = "test_SparseMatrixTest_CSR_segments"; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault > #endif >; diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h index 439fab7dfec75dcde32bc880abfe114b5a0257fd..5feb97e11cfa36adddf320cf278eb9860c36ff71 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -24,31 +24,31 @@ // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault > #ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. - ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >, - //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR > + ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault > #endif // HAVE_CUDA >;