diff --git a/Documentation/Tutorials/Matrices/CMakeLists.txt b/Documentation/Tutorials/Matrices/CMakeLists.txt
index 0d672aa0b04f51eeda133f5b785568632dfe8b0e..94e57ec13df05c0368087e158f0d2bf199c84e27 100644
--- a/Documentation/Tutorials/Matrices/CMakeLists.txt
+++ b/Documentation/Tutorials/Matrices/CMakeLists.txt
@@ -104,9 +104,9 @@ ELSE()
 
    ####
    # THe following examples/benchmarks run for very long time
-   ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark_cuda.cpp )
-   ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark_cuda.cpp )
-   ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark_cuda.cpp )
+   ADD_EXECUTABLE( DenseMatrixSetup_Benchmark DenseMatrixSetup_Benchmark.cpp )
+   ADD_EXECUTABLE( SparseMatrixSetup_Benchmark SparseMatrixSetup_Benchmark.cpp )
+   ADD_EXECUTABLE( MultidiagonalMatrixSetup_Benchmark MultidiagonalMatrixSetup_Benchmark.cpp )
 ENDIF()
 
 IF( BUILD_CUDA )
diff --git a/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp b/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
index c53a8f5b4a9edc06b499448dcec71e91b4428529..a36e17e7b979aa859a0a319283ad06310c5f6eb8 100644
--- a/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
+++ b/Documentation/Tutorials/Matrices/SparseMatrixSetup_Benchmark.cpp
@@ -69,7 +69,7 @@ template< typename Matrix >
 void setElement_on_host_and_transfer( const int gridSize, Matrix& matrix )
 {
    using RealType = typename Matrix::RealType;
-   using HostMatrix = typename Matrix::Self< RealType, TNL::Devices::Host >;
+   using HostMatrix = typename Matrix::template Self< RealType, TNL::Devices::Host >;
 
    const int matrixSize = gridSize * gridSize;
    TNL::Containers::Vector< int, typename HostMatrix::DeviceType, int > rowCapacities( matrixSize, 5 );
diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
index 3acfb2438c33539594cb3de6aa8f4cc429d21b06..3f64bf33d59eaf271a3ca4b84c59de828a1982c6 100644
--- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
+++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h
@@ -479,7 +479,7 @@ struct LinearSolversBenchmark
                                                   DeviceType,
                                                   IndexType,
                                                   TNL::Matrices::GeneralMatrix,
-                                                  Algorithms::Segments::CSR
+                                                  Algorithms::Segments::CSRDefault
                                                 >;
          SharedPointer< CSR > matrixCopy;
          Matrices::copySparseMatrix( *matrixCopy, *matrixPointer );
diff --git a/src/Benchmarks/SpMV/spmv-legacy.h b/src/Benchmarks/SpMV/spmv-legacy.h
index ec0fd001860959efa0492e3a4c8497948ab5c010..fed37410cf4e004deec7b5a7a8ac6cb2b04ee1d7 100644
--- a/src/Benchmarks/SpMV/spmv-legacy.h
+++ b/src/Benchmarks/SpMV/spmv-legacy.h
@@ -49,7 +49,16 @@ using SlicedEllpackAlias = Matrices::Legacy::SlicedEllpack< Real, Device, Index
 
 // Segments based sparse matrix aliases
 template< typename Real, typename Device, typename Index >
-using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSR >;
+using SparseMatrix_CSR_Scalar = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRScalar >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_CSR_Vector = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRVector >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_CSR_Hybrid = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRHybrid >;
+
+template< typename Real, typename Device, typename Index >
+using SparseMatrix_CSR_Adaptive = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Algorithms::Segments::CSRAdaptive >;
 
 template< typename Device, typename Index, typename IndexAllocator >
 using EllpackSegments = Algorithms::Segments::Ellpack< Device, Index, IndexAllocator >;
@@ -309,26 +318,29 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
    benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );
 #endif
 
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar    >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector    >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light2     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light3     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light4     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light5     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light6     >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive  >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_MultiVector>( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_LightWithoutAtomic>( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_CSR                 >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::Ellpack        >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_Ellpack             >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SlicedEllpackAlias               >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack       >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_ChunkedEllpack      >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, Matrices::Legacy::BiEllpack      >( benchmark, hostOutVector, inputFileName, verboseMR );
-   benchmarkSpMV< Real, SparseMatrix_BiEllpack           >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light              >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light2             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light3             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light4             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light5             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light6             >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive           >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_MultiVector        >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrixLegacy_CSR_LightWithoutAtomic >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR_Scalar                   >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR_Vector                   >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR_Hybrid                   >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_CSR_Adaptive                 >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::Ellpack                 >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_Ellpack                      >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SlicedEllpackAlias                        >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_SlicedEllpack                >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack          >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_ChunkedEllpack               >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, Matrices::Legacy::BiEllpack               >( benchmark, hostOutVector, inputFileName, verboseMR );
+   benchmarkSpMV< Real, SparseMatrix_BiEllpack                    >( benchmark, hostOutVector, inputFileName, verboseMR );
    /* AdEllpack is broken
    benchmarkSpMV< Real, Matrices::AdEllpack              >( benchmark, hostOutVector, inputFileName, verboseMR );
     */
diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
index 82e1f12cde656caf38f45bafa09f8dd38028f126..7897073d96152552a1150c5b94ed0c60ec45d987 100644
--- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h
@@ -63,7 +63,6 @@ std::string getCurrDateTime()
    timeinfo = localtime( &rawtime );
    strftime( buffer, sizeof( buffer ), "%d-%m-%Y--%H:%M:%S", timeinfo );
    std::string curr_date_time( buffer );
-   
    return curr_date_time;
 }
 
@@ -133,8 +132,7 @@ main( int argc, char* argv[] )
 
    // prepare global metadata
    Benchmark::MetadataMap metadata = getHardwareMetadata();
-   
-   
+
    // Initiate setup of benchmarks
    if( precision == "all" || precision == "float" )
       runSpMVBenchmarks< float >( benchmark, metadata, inputFileName, verboseMR );
diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h
index 9d2b84b618f835e1578b5441fbfe9cbbdddd8033..3a04e80fd098a3990bebb396337396929c4d2cd0 100644
--- a/src/TNL/Algorithms/Segments/CSR.h
+++ b/src/TNL/Algorithms/Segments/CSR.h
@@ -22,6 +22,7 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename Kernel = CSRKernelScalar< Index, Device >,
           typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
 class CSR
 {
@@ -29,12 +30,13 @@ class CSR
 
       using DeviceType = Device;
       using IndexType = std::remove_const_t< Index >;
+      using KernelType = Kernel;
       using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >;
       using SegmentsSizes = OffsetsHolder;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = CSRView< Device_, Index_ >;
-      using ViewType = CSRView< Device, Index >;
-      using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >;
+      using ViewTemplate = CSRView< Device_, Index_, KernelType >;
+      using ViewType = CSRView< Device, Index, KernelType >;
+      using ConstViewType = CSRView< Device, std::add_const_t< IndexType >, KernelType >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       CSR();
@@ -114,8 +116,8 @@ class CSR
 
       CSR& operator=( const CSR& rhsSegments ) = default;
 
-      template< typename Device_, typename Index_, typename IndexAllocator_ >
-      CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source );
+      template< typename Device_, typename Index_, typename Kernel_, typename IndexAllocator_ >
+      CSR& operator=( const CSR< Device_, Index_, Kernel_, IndexAllocator_ >& source );
 
       void save( File& file ) const;
 
@@ -124,7 +126,36 @@ class CSR
    protected:
 
       OffsetsHolder offsets;
+
+      KernelType kernel;
 };
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+using CSRScalar = CSR< Device, Index, CSRKernelScalar< Index, Device >, IndexAllocator >;
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+using CSRVector = CSR< Device, Index, CSRKernelVector< Index, Device >, IndexAllocator >;
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+using CSRHybrid = CSR< Device, Index, CSRKernelHybrid< Index, Device >, IndexAllocator >;
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+using CSRAdaptive = CSR< Device, Index, CSRKernelAdaptive< Index, Device >, IndexAllocator >;
+
+template< typename Device,
+          typename Index,
+          typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
+using CSRDefault = CSRScalar< Device, Index, IndexAllocator >;
+
+
       } // namespace Segments
    }  // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSR.hpp b/src/TNL/Algorithms/Segments/CSR.hpp
index a6b915db343ba840e5396796531f4af36ae67071..d6a177f3be5206301b388575346a95bb04d76393 100644
--- a/src/TNL/Algorithms/Segments/CSR.hpp
+++ b/src/TNL/Algorithms/Segments/CSR.hpp
@@ -22,16 +22,18 @@ namespace TNL {
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 CSR()
 {
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 CSR( const SegmentsSizes& segmentsSizes )
 {
    this->setSegmentsSizes( segmentsSizes );
@@ -39,36 +41,42 @@ CSR( const SegmentsSizes& segmentsSizes )
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-CSR< Device, Index, IndexAllocator >::
-CSR( const CSR& csr ) : offsets( csr.offsets )
+CSR< Device, Index, Kernel, IndexAllocator >::
+CSR( const CSR& csr ) : offsets( csr.offsets ), kernel( csr.kernel )
 {
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-CSR< Device, Index, IndexAllocator >::
-CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) )
+CSR< Device, Index, Kernel, IndexAllocator >::
+CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ), kernel( std::move( csr.kernel ) )
 {
 
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 String
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 getSerializationType()
 {
-   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+   return "CSR< [any_device], " +
+      TNL::getSerializationType< IndexType >() +
+      TNL::getSerializationType< KernelType >() + " >";
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 String
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 getSegmentsType()
 {
    return ViewType::getSegmentsType();
@@ -76,51 +84,58 @@ getSegmentsType()
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
    template< typename SizesHolder >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 setSegmentsSizes( const SizesHolder& sizes )
 {
    details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets );
+   this->kernel.init( this->offsets );
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 reset()
 {
    this->offsets.setSize( 1 );
    this->offsets = 0;
+   this->kernel.reset();
 }
 
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-typename CSR< Device, Index, IndexAllocator >::ViewType
-CSR< Device, Index, IndexAllocator >::
+typename CSR< Device, Index, Kernel, IndexAllocator >::ViewType
+CSR< Device, Index, Kernel, IndexAllocator >::
 getView()
 {
-   return ViewType( this->offsets.getView() );
+   return ViewType( this->offsets.getView(), this->kernel.getView() );
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 auto
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( this->offsets.getConstView() );
+   return ConstViewType( this->offsets.getConstView(), this->kernel.getConstView() );
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
+__cuda_callable__ auto CSR< Device, Index, Kernel, IndexAllocator >::
 getSegmentsCount() const -> IndexType
 {
    return this->offsets.getSize() - 1;
@@ -128,8 +143,9 @@ getSegmentsCount() const -> IndexType
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
+__cuda_callable__ auto CSR< Device, Index, Kernel, IndexAllocator >::
 getSegmentSize( const IndexType segmentIdx ) const -> IndexType
 {
    return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
@@ -137,8 +153,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
+__cuda_callable__ auto CSR< Device, Index, Kernel, IndexAllocator >::
 getSize() const -> IndexType
 {
    return this->getStorageSize();
@@ -146,8 +163,9 @@ getSize() const -> IndexType
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
+__cuda_callable__ auto CSR< Device, Index, Kernel, IndexAllocator >::
 getStorageSize() const -> IndexType
 {
    return details::CSR< Device, Index >::getStorageSize( this->offsets );
@@ -155,8 +173,9 @@ getStorageSize() const -> IndexType
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-__cuda_callable__ auto CSR< Device, Index, IndexAllocator >::
+__cuda_callable__ auto CSR< Device, Index, Kernel, IndexAllocator >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
 {
    if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -172,10 +191,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 __cuda_callable__
 auto
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] );
@@ -183,10 +203,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
    template< typename Function, typename... Args >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    this->getConstView().forSegments( first, last, f, args... );
@@ -194,10 +215,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator>
    template< typename Function, typename... Args >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSegmentsCount(), f, args... );
@@ -205,10 +227,11 @@ forAll( Function& f, Args... args ) const
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... );
@@ -216,10 +239,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
@@ -227,21 +251,24 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-   template< typename Device_, typename Index_, typename IndexAllocator_ >
-CSR< Device, Index, IndexAllocator >&
-CSR< Device, Index, IndexAllocator >::
-operator=( const CSR< Device_, Index_, IndexAllocator_ >& source )
+   template< typename Device_, typename Index_, typename Kernel_, typename IndexAllocator_ >
+CSR< Device, Index, Kernel, IndexAllocator >&
+CSR< Device, Index, Kernel, IndexAllocator >::
+operator=( const CSR< Device_, Index_, Kernel_, IndexAllocator_ >& source )
 {
    this->offsets = source.offsets;
+   this->kernel = kernel;
    return *this;
 }
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
 void
-CSR< Device, Index, IndexAllocator >::
+CSR< Device, Index, Kernel, IndexAllocator >::
 save( File& file ) const
 {
    file << this->offsets;
@@ -249,12 +276,14 @@ save( File& file ) const
 
 template< typename Device,
           typename Index,
+          typename Kernel,
           typename IndexAllocator >
-void 
-CSR< Device, Index, IndexAllocator >::
+void
+CSR< Device, Index, Kernel, IndexAllocator >::
 load( File& file )
 {
    file >> this->offsets;
+   this->kernel.init( this->offsets );
 }
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
new file mode 100644
index 0000000000000000000000000000000000000000..feed58a58792c0ba01c4c78040758fd21b9f5eed
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelAdaptive.h
@@ -0,0 +1,487 @@
+/***************************************************************************
+                          CSRKernels.h -  description
+                             -------------------
+    begin                : Jan 20, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+#include <TNL/Algorithms/Segments/CSRKernelScalar.h>
+#include <TNL/Algorithms/Segments/details/CSRAdaptiveKernelBlockDescriptor.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+#ifdef HAVE_CUDA
+
+template< int CudaBlockSize,
+          int warpSize,
+          int WARPS,
+          int SHARED_PER_WARP,
+          int MAX_ELEM_PER_WARP,
+          typename BlocksView,
+          typename Offsets,
+          typename Index,
+          typename Fetch,
+          typename Reduction,
+          typename ResultKeeper,
+          typename Real,
+          typename... Args >
+__global__ void
+segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
+                                    int gridIdx,
+                                    Offsets offsets,
+                                    Index first,
+                                    Index last,
+                                    Fetch fetch,
+                                    Reduction reduce,
+                                    ResultKeeper keep,
+                                    Real zero,
+                                    Args... args )
+{
+   __shared__ Real streamShared[ WARPS ][ SHARED_PER_WARP ];
+   __shared__ Real multivectorShared[ CudaBlockSize / warpSize ];
+   constexpr size_t MAX_X_DIM = 2147483647;
+   const Index index = (gridIdx * MAX_X_DIM) + (blockIdx.x * blockDim.x) + threadIdx.x;
+   const Index blockIdx = index / warpSize;
+   if( blockIdx >= blocks.getSize() - 1 )
+      return;
+
+   if( threadIdx.x < CudaBlockSize / warpSize )
+      multivectorShared[ threadIdx.x ] = zero;
+   Real result = zero;
+   bool compute( true );
+   const Index laneIdx = threadIdx.x & 31; // & is cheaper than %
+   const details::CSRAdaptiveKernelBlockDescriptor< Index > block = blocks[ blockIdx ];
+   const Index& firstSegmentIdx = block.getFirstSegment();
+   const Index begin = offsets[ firstSegmentIdx ];
+
+   const auto blockType = block.getType();
+   if( blockType == details::Type::STREAM ) // Stream kernel - many short segments per warp
+   {
+      const Index warpIdx = threadIdx.x / 32;
+      const Index end = begin + block.getSize();
+
+      // Stream data to shared memory
+      for( Index globalIdx = laneIdx + begin; globalIdx < end; globalIdx += warpSize )
+      {
+         streamShared[ warpIdx ][ globalIdx - begin ] = //fetch( globalIdx, compute );
+            details::FetchLambdaAdapter< Index, Fetch >::call( fetch, -1, -1, globalIdx, compute );
+         // TODO:: fix this by template specialization so that we can assume fetch lambda
+         // with short parameters
+      }
+
+      const Index lastSegmentIdx = firstSegmentIdx + block.getSegmentsInBlock();
+
+      for( Index i = firstSegmentIdx + laneIdx; i < lastSegmentIdx; i += warpSize )
+      {
+         const Index sharedEnd = offsets[ i + 1 ] - begin; // end of preprocessed data
+         result = zero;
+         // Scalar reduction
+         for( Index sharedIdx = offsets[ i ] - begin; sharedIdx < sharedEnd; sharedIdx++ )
+            result = reduce( result, streamShared[ warpIdx ][ sharedIdx ] );
+         keep( i, result );
+      }
+   }
+   else if( blockType == details::Type::VECTOR ) // Vector kernel - one segment per warp
+   {
+      const Index end = begin + block.getSize();
+      const Index segmentIdx = block.getFirstSegment();
+
+      for( Index globalIdx = begin + laneIdx; globalIdx < end; globalIdx += warpSize )
+         result = reduce( result, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, -1, globalIdx, compute ) ); // fix local idx
+
+      // Parallel reduction
+      result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result, 16 ) );
+      result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result,  8 ) );
+      result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result,  4 ) );
+      result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result,  2 ) );
+      result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result,  1 ) );
+      if( laneIdx == 0 )
+         keep( segmentIdx, result );
+   }
+   else // blockType == Type::LONG - several warps per segment
+   {
+      // Number of elements processed by previous warps
+      //const Index offset = //block.index[1] * MAX_ELEM_PER_WARP;
+      ///   block.getWarpIdx() * MAX_ELEM_PER_WARP;
+      //Index to = begin + (block.getWarpIdx()  + 1) * MAX_ELEM_PER_WARP;
+      const Index segmentIdx = block.getFirstSegment();//block.index[0];
+      //minID = offsets[block.index[0] ];
+      const Index end = offsets[segmentIdx + 1];
+      const int tid = threadIdx.x;
+      //const int inBlockWarpIdx = block.getWarpIdx();
+
+      //if( to > end )
+      //   to = end;
+      TNL_ASSERT_GT( block.getWarpsCount(), 0, "" );
+      result = zero;
+      //printf( "LONG tid %d warpIdx %d: LONG \n", tid, block.getWarpIdx()  );
+      for( Index globalIdx = begin + laneIdx + TNL::Cuda::getWarpSize() * block.getWarpIdx();
+           globalIdx < end;
+           globalIdx += TNL::Cuda::getWarpSize() * block.getWarpsCount() )
+      {
+         result = reduce( result, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, -1, globalIdx, compute ) );
+         //if( laneIdx == 0 )
+         //   printf( "LONG warpIdx: %d gid: %d begin: %d end: %d -> %d \n", ( int ) block.getWarpIdx(), globalIdx, begin, end,
+         //    details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, 0, globalIdx, compute ) );
+         //result += values[i] * inVector[columnIndexes[i]];
+      }
+      //printf( "tid %d -> %d \n", tid, result );
+
+      result += __shfl_down_sync(0xFFFFFFFF, result, 16);
+      result += __shfl_down_sync(0xFFFFFFFF, result, 8);
+      result += __shfl_down_sync(0xFFFFFFFF, result, 4);
+      result += __shfl_down_sync(0xFFFFFFFF, result, 2);
+      result += __shfl_down_sync(0xFFFFFFFF, result, 1);
+
+      //if( laneIdx == 0 )
+      //   printf( "WARP RESULT: tid %d -> %d \n", tid, result );
+
+      const Index warpID = threadIdx.x / 32;
+      if( laneIdx == 0 )
+         multivectorShared[ warpID ] = result;
+
+      __syncthreads();
+      // Reduction in multivectorShared
+      if( block.getWarpIdx() == 0 && laneIdx < 16 )
+      {
+         constexpr int totalWarps = CudaBlockSize / warpSize;
+         if( totalWarps >= 32 )
+         {
+            multivectorShared[ laneIdx ] =  reduce( multivectorShared[ laneIdx ], multivectorShared[ laneIdx + 16 ] );
+            __syncwarp();
+         }
+         if( totalWarps >= 16 )
+         {
+            multivectorShared[ laneIdx ] =  reduce( multivectorShared[ laneIdx ], multivectorShared[ laneIdx +  8 ] );
+            __syncwarp();
+         }
+         if( totalWarps >= 8 )
+         {
+            multivectorShared[ laneIdx ] =  reduce( multivectorShared[ laneIdx ], multivectorShared[ laneIdx +  4 ] );
+            __syncwarp();
+         }
+         if( totalWarps >= 4 )
+         {
+            multivectorShared[ laneIdx ] =  reduce( multivectorShared[ laneIdx ], multivectorShared[ laneIdx +  2 ] );
+            __syncwarp();
+         }
+         if( totalWarps >= 2 )
+         {
+            multivectorShared[ laneIdx ] =  reduce( multivectorShared[ laneIdx ], multivectorShared[ laneIdx +  1 ] );
+            __syncwarp();
+         }
+         if( laneIdx == 0 )
+         {
+            //printf( "Long: segmentIdx %d -> %d \n", segmentIdx, multivectorShared[ 0 ] );
+            keep( segmentIdx, multivectorShared[ 0 ] );
+         }
+      }
+   }
+}
+#endif
+
+
+template< typename Index,
+          typename Device >
+struct CSRKernelAdaptiveView
+{
+   using IndexType = Index;
+   using DeviceType = Device;
+   using ViewType = CSRKernelAdaptiveView< Index, Device >;
+   using ConstViewType = CSRKernelAdaptiveView< Index, Device >;
+   using BlocksType = TNL::Containers::Vector< details::CSRAdaptiveKernelBlockDescriptor< Index >, Device, Index >;
+   using BlocksView = typename BlocksType::ViewType;
+
+   CSRKernelAdaptiveView() = default;
+
+   CSRKernelAdaptiveView( BlocksType& blocks )
+   {
+      this->blocks.bind( blocks );
+   };
+
+   void setBlocks( BlocksType& blocks )
+   {
+      this->blocks.bind( blocks );
+   }
+
+   ViewType getView() { return *this; };
+
+   ConstViewType getConstView() const { return *this; };
+
+   template< typename OffsetsView,
+             typename Fetch,
+             typename Reduction,
+             typename ResultKeeper,
+             typename Real,
+             typename... Args >
+   void segmentsReduction( const OffsetsView& offsets,
+                        Index first,
+                        Index last,
+                        Fetch& fetch,
+                        const Reduction& reduction,
+                        ResultKeeper& keeper,
+                        const Real& zero,
+                        Args... args ) const
+   {
+#ifdef HAVE_CUDA
+      if( details::CheckFetchLambda< Index, Fetch >::hasAllParameters() )
+      {
+         TNL::Algorithms::Segments::CSRKernelScalar< Index, Device >::
+            segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+         return;
+      }
+
+      static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
+      //static constexpr Index THREADS_SCALAR = 128;
+      //static constexpr Index THREADS_VECTOR = 128;
+      //static constexpr Index THREADS_LIGHT = 128;
+
+      /* Max length of row to process one warp for CSR Light, MultiVector */
+      //static constexpr Index MAX_ELEMENTS_PER_WARP = 384;
+
+      /* Max length of row to process one warp for CSR Adaptive */
+      static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = 512;
+
+      /* How many shared memory use per block in CSR Adaptive kernel */
+      static constexpr Index SHARED_PER_BLOCK = 24576;
+
+      /* Number of elements in shared memory */
+      static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(Real);
+
+      /* Number of warps in block for CSR Adaptive */
+      static constexpr Index WARPS = THREADS_ADAPTIVE / 32;
+
+      /* Number of elements in shared memory per one warp */
+      static constexpr Index SHARED_PER_WARP = SHARED / WARPS;
+
+      constexpr int warpSize = 32;
+
+      Index blocksCount;
+
+      const Index threads = THREADS_ADAPTIVE;
+      constexpr size_t MAX_X_DIM = 2147483647;
+
+      /* Fill blocks */
+      size_t neededThreads = this->blocks.getSize() * warpSize; // one warp per block
+      /* Execute kernels on device */
+      for (Index gridIdx = 0; neededThreads != 0; gridIdx++ )
+      {
+         if (MAX_X_DIM * threads >= neededThreads)
+         {
+            blocksCount = roundUpDivision(neededThreads, threads);
+            neededThreads = 0;
+         }
+         else
+         {
+            blocksCount = MAX_X_DIM;
+            neededThreads -= MAX_X_DIM * threads;
+         }
+
+         segmentsReductionCSRAdaptiveKernel<
+               THREADS_ADAPTIVE,
+               warpSize,
+               WARPS,
+               SHARED_PER_WARP,
+               MAX_ELEMENTS_PER_WARP_ADAPT,
+               BlocksView,
+               OffsetsView,
+               Index, Fetch, Reduction, ResultKeeper, Real, Args... >
+            <<<blocksCount, threads>>>(
+               this->blocks,
+               gridIdx,
+               offsets,
+               first,
+               last,
+               fetch,
+               reduction,
+               keeper,
+               zero,
+               args... );
+      }
+#endif
+   }
+
+   CSRKernelAdaptiveView& operator=( const CSRKernelAdaptiveView< Index, Device >& kernelView )
+   {
+      this->blocks.bind( kernelView.blocks );
+      return *this;
+   }
+
+   void printBlocks() const
+   {
+      for( Index i = 0; i < this->blocks.getSize(); i++ )
+      {
+         auto block = blocks.getElement( i );
+         std::cout << "Block " << i << " : " << block << std::endl;
+      }
+
+   }
+
+   protected:
+      BlocksView blocks;
+};
+
+template< typename Index,
+          typename Device >
+struct CSRKernelAdaptive
+{
+    using IndexType = Index;
+    using DeviceType = Device;
+    using ViewType = CSRKernelAdaptiveView< Index, Device >;
+    using ConstViewType = CSRKernelAdaptiveView< Index, Device >;
+    using BlocksType = typename ViewType::BlocksType;
+    using BlocksView = typename BlocksType::ViewType;
+
+
+    static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
+
+   /* How many shared memory use per block in CSR Adaptive kernel */
+   static constexpr Index SHARED_PER_BLOCK = 20000; //24576; TODO:
+
+   /* Number of elements in shared memory */
+   static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(double);
+
+   /* Number of warps in block for CSR Adaptive */
+   static constexpr Index WARPS = THREADS_ADAPTIVE / 32;
+
+   /* Number of elements in shared memory per one warp */
+   static constexpr Index SHARED_PER_WARP = SHARED / WARPS;
+
+   /* Max length of row to process one warp for CSR Light, MultiVector */
+   static constexpr Index MAX_ELEMENTS_PER_WARP = 384;
+
+   /* Max length of row to process one warp for CSR Adaptive */
+   static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = 512;
+
+   template< typename Offsets >
+   Index findLimit( const Index start,
+                    const Offsets& offsets,
+                    const Index size,
+                    details::Type &type,
+                    Index &sum )
+   {
+      sum = 0;
+      for (Index current = start; current < size - 1; current++ )
+      {
+         Index elements = offsets.getElement(current + 1) -
+                           offsets.getElement(current);
+         sum += elements;
+         if( sum > SHARED_PER_WARP )
+         {
+            if( current - start > 0 ) // extra row
+            {
+               type = details::Type::STREAM;
+               return current;
+            }
+            else
+            {                  // one long row
+               if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
+                  type = details::Type::VECTOR;
+               else
+                  type = details::Type::LONG;
+                  //type = Type::LONG; //
+               return current + 1;
+            }
+         }
+      }
+      type = details::Type::STREAM;
+      return size - 1; // return last row pointer
+    }
+
+    template< typename Offsets >
+    void init( const Offsets& offsets )
+    {
+        const Index rows = offsets.getSize();
+        Index sum, start( 0 ), nextStart( 0 );
+
+        // Fill blocks
+        std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock;
+        inBlock.reserve( rows );
+
+        while( nextStart != rows - 1 )
+        {
+            details::Type type;
+            nextStart = findLimit( start, offsets, rows, type, sum );
+
+            if( type == details::Type::LONG )
+            {
+               const Index blocksCount = inBlock.size();
+               const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
+               Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
+               if( warpsLeft == 0 )
+                  warpsLeft = warpsPerCudaBlock;
+               //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP);
+               inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft );
+               for( Index index = 1; index < warpsLeft; index++ )
+               {
+                  inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft );
+               }
+            }
+            else
+            {
+               inBlock.emplace_back(start, type,
+                    nextStart,
+                    offsets.getElement(nextStart),
+                    offsets.getElement(start) );
+            }
+            start = nextStart;
+        }
+        inBlock.emplace_back(nextStart);
+
+        // Copy values
+        this->blocks.setSize(inBlock.size());
+        for (size_t i = 0; i < inBlock.size(); ++i)
+            this->blocks.setElement(i, inBlock[i]);
+
+         this->view.setBlocks( blocks );
+    };
+
+   void reset()
+   {
+      this->blocks.reset();
+      this->view.setBlocks( blocks );
+   }
+
+   ViewType getView() { return this->view; };
+
+   ConstViewType getConstView() const { return this->view; };
+
+   template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+   void segmentsReduction( const OffsetsView& offsets,
+                        Index first,
+                        Index last,
+                        Fetch& fetch,
+                        const Reduction& reduction,
+                        ResultKeeper& keeper,
+                        const Real& zero,
+                        Args... args ) const
+   {
+      view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+   }
+
+   protected:
+      BlocksType blocks;
+
+      ViewType view;
+};
+
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.h b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h
new file mode 100644
index 0000000000000000000000000000000000000000..92a4a54ee692f620a8b9983a5b637c25cfd1a09d
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.h
@@ -0,0 +1,65 @@
+/***************************************************************************
+                          CSRKernelHybrid.h -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+template< typename Index,
+          typename Device >
+struct CSRKernelHybrid
+{
+   using IndexType = Index;
+   using DeviceType = Device;
+   using ViewType = CSRKernelHybrid< Index, Device >;
+   using ConstViewType = CSRKernelHybrid< Index, Device >;
+
+   template< typename Offsets >
+   void init( const Offsets& offsets );
+
+   void reset();
+
+   ViewType getView();
+
+   ConstViewType getConstView() const;
+
+
+   template< typename OffsetsView,
+             typename Fetch,
+             typename Reduction,
+             typename ResultKeeper,
+             typename Real,
+             typename... Args >
+   void segmentsReduction( const OffsetsView& offsets,
+                                  Index first,
+                                  Index last,
+                                  Fetch& fetch,
+                                  const Reduction& reduction,
+                                  ResultKeeper& keeper,
+                                  const Real& zero,
+                                  Args... args ) const;
+
+   protected:
+      int threadsPerSegment;
+};
+
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
+
+#include <TNL/Algorithms/Segments/CSRKernelHybrid.hpp>
diff --git a/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..06d2d2868d04f0b8562a991b34fa91539cb43d03
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelHybrid.hpp
@@ -0,0 +1,195 @@
+/***************************************************************************
+                          CSRKernelHybrid.hpp -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+#include <TNL/Algorithms/Segments/CSRKernelHybrid.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+#ifdef HAVE_CUDA
+template< int ThreadsPerSegment,
+          typename Offsets,
+          typename Index,
+          typename Fetch,
+          typename Reduction,
+          typename ResultKeeper,
+          typename Real,
+          typename... Args >
+__global__
+void segmentsReductionCSRHybridKernel(
+    int gridIdx,
+    const Offsets offsets,
+    Index first,
+    Index last,
+    Fetch fetch,
+    const Reduction reduce,
+    ResultKeeper keep,
+    const Real zero,
+    Args... args )
+{
+    /***
+     * We map one warp to each segment
+     */
+    const Index segmentIdx =  TNL::Cuda::getGlobalThreadIdx( gridIdx ) / ThreadsPerSegment + first;
+    if( segmentIdx >= last )
+        return;
+
+    const int laneIdx = threadIdx.x & ( ThreadsPerSegment - 1 ); // & is cheaper than %
+    Index endIdx = offsets[ segmentIdx + 1] ;
+
+    Index localIdx( laneIdx );
+    Real aux = zero;
+    bool compute( true );
+    for( Index globalIdx = offsets[ segmentIdx ] + localIdx; globalIdx < endIdx; globalIdx += ThreadsPerSegment )
+    {
+      aux = reduce( aux, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) );
+      localIdx += TNL::Cuda::getWarpSize();
+    }
+
+    /****
+     * Reduction in each segment.
+     */
+    if( ThreadsPerSegment == 32 )
+        aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux, 16 ) );
+    if( ThreadsPerSegment >= 16 )
+        aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  8 ) );
+    if( ThreadsPerSegment >= 8 )
+        aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  4 ) );
+    if( ThreadsPerSegment >= 4 )
+        aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  2 ) );
+    if( ThreadsPerSegment >= 2 )
+        aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  1 ) );
+
+    if( laneIdx == 0 )
+        keep( segmentIdx, aux );
+}
+#endif
+
+
+
+template< typename Index,
+          typename Device >
+    template< typename Offsets >
+void
+CSRKernelHybrid< Index, Device >::
+init( const Offsets& offsets )
+{
+    const Index segmentsCount = offsets.getSize() - 1;
+    const Index elementsInSegment = std::ceil( ( double ) offsets.getElement( segmentsCount ) / ( double ) segmentsCount );
+    this->threadsPerSegment = TNL::min( std::pow( 2, std::ceil( std::log2( elementsInSegment ) ) ), TNL::Cuda::getWarpSize() );
+    TNL_ASSERT_GE( threadsPerSegment, 0, "" );
+    TNL_ASSERT_LE( threadsPerSegment, 32, "" );
+}
+
+template< typename Index,
+          typename Device >
+void
+CSRKernelHybrid< Index, Device >::
+reset()
+{
+    this->threadsPerSegment = 0;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelHybrid< Index, Device >::
+getView() -> ViewType
+{
+    return *this;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelHybrid< Index, Device >::
+getConstView() const -> ConstViewType
+{
+    return *this;
+};
+
+
+template< typename Index,
+          typename Device >
+    template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+void
+CSRKernelHybrid< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                         Index first,
+                         Index last,
+                         Fetch& fetch,
+                         const Reduction& reduction,
+                         ResultKeeper& keeper,
+                         const Real& zero,
+                         Args... args ) const
+{
+    TNL_ASSERT_GE( this->threadsPerSegment, 0, "" );
+    TNL_ASSERT_LE( this->threadsPerSegment, 32, "" );
+
+#ifdef HAVE_CUDA
+    const size_t threadsCount = this->threadsPerSegment * ( last - first );
+    dim3 blocksCount, gridsCount, blockSize( 256 );
+    TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount );
+    //std::cerr << " this->threadsPerSegment = " << this->threadsPerSegment << " offsets = " << offsets << std::endl;
+    for( unsigned int gridIdx = 0; gridIdx < gridsCount.x; gridIdx ++ )
+    {
+        dim3 gridSize;
+        TNL::Cuda::setupGrid( blocksCount, gridsCount, gridIdx, gridSize );
+        switch( this->threadsPerSegment )
+        {
+            case 0:      // this means zero/empty matrix
+                break;
+            case 1:
+                segmentsReductionCSRHybridKernel<  1, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            case 2:
+                segmentsReductionCSRHybridKernel<  2, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            case 4:
+                segmentsReductionCSRHybridKernel<  4, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            case 8:
+                segmentsReductionCSRHybridKernel<  8, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            case 16:
+                segmentsReductionCSRHybridKernel< 16, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            case 32:
+                segmentsReductionCSRHybridKernel< 32, OffsetsView, Index, Fetch, Reduction, ResultKeeper, Real, Args... ><<< gridSize, blockSize >>>(
+                    gridIdx, offsets, first, last, fetch, reduction, keeper, zero, args... );
+                    break;
+            default:
+                throw std::runtime_error( std::string( "Wrong value of threadsPerSegment: " ) + std::to_string( this->threadsPerSegment ) );
+        }
+    }
+#endif
+}
+
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.h b/src/TNL/Algorithms/Segments/CSRKernelScalar.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a716c890d07b9fd797235540ee89dd85de86196
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.h
@@ -0,0 +1,61 @@
+/***************************************************************************
+                          CSRKernelScalar.h -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+template< typename Index,
+          typename Device >
+struct CSRKernelScalar
+{
+    using IndexType = Index;
+    using DeviceType = Device;
+    using ViewType = CSRKernelScalar< Index, Device >;
+    using ConstViewType = CSRKernelScalar< Index, Device >;
+
+    template< typename Offsets >
+    void init( const Offsets& offsets );
+
+    void reset();
+
+    ViewType getView();
+
+    ConstViewType getConstView() const;
+
+    template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+    static void segmentsReduction( const OffsetsView& offsets,
+                               Index first,
+                               Index last,
+                               Fetch& fetch,
+                               const Reduction& reduction,
+                               ResultKeeper& keeper,
+                               const Real& zero,
+                               Args... args );
+};
+
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
+
+#include <TNL/Algorithms/Segments/CSRKernelScalar.hpp>
\ No newline at end of file
diff --git a/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7dd0f5cd7b87cc213585ad955e91e4b363ee3a9c
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelScalar.hpp
@@ -0,0 +1,92 @@
+/***************************************************************************
+                          CSRKernelScalar.h -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/CSRKernelScalar.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+template< typename Index,
+          typename Device >
+    template< typename Offsets >
+void
+CSRKernelScalar< Index, Device >::
+init( const Offsets& offsets )
+{
+}
+
+template< typename Index,
+          typename Device >
+void
+CSRKernelScalar< Index, Device >::
+reset()
+{
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelScalar< Index, Device >::
+getView() -> ViewType
+{
+    return *this;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelScalar< Index, Device >::
+getConstView() const -> ConstViewType
+{
+    return *this;
+};
+
+template< typename Index,
+          typename Device >
+    template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+void
+CSRKernelScalar< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                   Index first,
+                   Index last,
+                   Fetch& fetch,
+                   const Reduction& reduction,
+                   ResultKeeper& keeper,
+                   const Real& zero,
+                   Args... args )
+{
+    auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
+        const IndexType begin = offsets[ segmentIdx ];
+        const IndexType end = offsets[ segmentIdx + 1 ];
+        Real aux( zero );
+        IndexType localIdx( 0 );
+        bool compute( true );
+        for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
+            aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
+        keeper( segmentIdx, aux );
+    };
+    Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+}
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.h b/src/TNL/Algorithms/Segments/CSRKernelVector.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a6ccf7ff71cea21829121450dd2e09fbbc68b0f
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelVector.h
@@ -0,0 +1,62 @@
+/***************************************************************************
+                          CSRKernelVector.h -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+template< typename Index,
+          typename Device >
+struct CSRKernelVector
+{
+    using IndexType = Index;
+    using DeviceType = Device;
+    using ViewType = CSRKernelVector< Index, Device >;
+    using ConstViewType = CSRKernelVector< Index, Device >;
+
+    template< typename Offsets >
+    void init( const Offsets& offsets );
+
+    void reset();
+
+    ViewType getView();
+
+    ConstViewType getConstView() const;
+
+
+    template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+    static void segmentsReduction( const OffsetsView& offsets,
+                                   Index first,
+                                   Index last,
+                                   Fetch& fetch,
+                                   const Reduction& reduction,
+                                   ResultKeeper& keeper,
+                                   const Real& zero,
+                                   Args... args );
+};
+
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
+
+#include <TNL/Algorithms/Segments/CSRKernelVector.hpp>
diff --git a/src/TNL/Algorithms/Segments/CSRKernelVector.hpp b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d6f5bb7ec321b25ef89a4654197d86c985123706
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/CSRKernelVector.hpp
@@ -0,0 +1,152 @@
+/***************************************************************************
+                          CSRKernelVector.hpp -  description
+                             -------------------
+    begin                : Jan 23, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Cuda/LaunchHelpers.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Algorithms/ParallelFor.h>
+#include <TNL/Algorithms/Segments/details/LambdaAdapter.h>
+#include <TNL/Algorithms/Segments/CSRKernelVector.h>
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+
+#ifdef HAVE_CUDA
+template< typename Offsets,
+          typename Index,
+          typename Fetch,
+          typename Reduction,
+          typename ResultKeeper,
+          typename Real,
+          typename... Args >
+__global__
+void segmentsReductionCSRKernelVector(
+    int gridIdx,
+    const Offsets offsets,
+    Index first,
+    Index last,
+    Fetch fetch,
+    const Reduction reduce,
+    ResultKeeper keep,
+    const Real zero,
+    Args... args )
+{
+    /***
+     * We map one warp to each segment
+     */
+    const Index segmentIdx =  TNL::Cuda::getGlobalThreadIdx( gridIdx ) / TNL::Cuda::getWarpSize() + first;
+    if( segmentIdx >= last )
+        return;
+
+    const int laneIdx = threadIdx.x & ( TNL::Cuda::getWarpSize() - 1 ); // & is cheaper than %
+    TNL_ASSERT_LT( segmentIdx + 1, offsets.getSize(), "" );
+    Index endIdx = offsets[ segmentIdx + 1 ];
+
+    Index localIdx( laneIdx );
+    Real aux = zero;
+    bool compute( true );
+    for( Index globalIdx = offsets[ segmentIdx ] + localIdx; globalIdx < endIdx; globalIdx += TNL::Cuda::getWarpSize() )
+    {
+        TNL_ASSERT_LT( globalIdx, endIdx, "" );
+        aux = reduce( aux, details::FetchLambdaAdapter< Index, Fetch >::call( fetch, segmentIdx, localIdx, globalIdx, compute ) );
+        localIdx += TNL::Cuda::getWarpSize();
+    }
+
+   /****
+    * Reduction in each warp which means in each segment.
+    */
+   aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux, 16 ) );
+   aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  8 ) );
+   aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  4 ) );
+   aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  2 ) );
+   aux = reduce( aux, __shfl_down_sync( 0xFFFFFFFF, aux,  1 ) );
+
+   if( laneIdx == 0 )
+     keep( segmentIdx, aux );
+}
+#endif
+
+template< typename Index,
+          typename Device >
+    template< typename Offsets >
+void
+CSRKernelVector< Index, Device >::
+init( const Offsets& offsets )
+{
+}
+
+template< typename Index,
+          typename Device >
+void
+CSRKernelVector< Index, Device >::
+reset()
+{
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelVector< Index, Device >::
+getView() -> ViewType
+{
+    return *this;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRKernelVector< Index, Device >::
+getConstView() const -> ConstViewType
+{
+    return *this;
+};
+
+
+template< typename Index,
+          typename Device >
+    template< typename OffsetsView,
+              typename Fetch,
+              typename Reduction,
+              typename ResultKeeper,
+              typename Real,
+              typename... Args >
+void
+CSRKernelVector< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                         Index first,
+                         Index last,
+                         Fetch& fetch,
+                         const Reduction& reduction,
+                         ResultKeeper& keeper,
+                         const Real& zero,
+                         Args... args )
+{
+#ifdef HAVE_CUDA
+    const Index warpsCount = last - first;
+    const size_t threadsCount = warpsCount * TNL::Cuda::getWarpSize();
+    dim3 blocksCount, gridsCount, blockSize( 256 );
+    TNL::Cuda::setupThreads( blockSize, blocksCount, gridsCount, threadsCount );
+    dim3 gridIdx;
+    for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x ++ )
+    {
+        dim3 gridSize;
+        TNL::Cuda::setupGrid( blocksCount, gridsCount, gridIdx, gridSize );
+        segmentsReductionCSRKernelVector< OffsetsView, IndexType, Fetch, Reduction, ResultKeeper, Real, Args... >
+        <<< gridSize, blockSize >>>(
+            gridIdx.x, offsets, first, last, fetch, reduction, keeper, zero, args... );
+    };
+#endif
+}
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h
index 610864f5e13921e6d81b775ebf7b233ea8b6b3e9..4576d9fdb5efbb527fb8f35d77b29b168b6a0978 100644
--- a/src/TNL/Algorithms/Segments/CSRView.h
+++ b/src/TNL/Algorithms/Segments/CSRView.h
@@ -14,35 +14,42 @@
 
 #include <TNL/Containers/Vector.h>
 #include <TNL/Algorithms/Segments/SegmentView.h>
+#include <TNL/Algorithms/Segments/CSRKernelScalar.h>
+#include <TNL/Algorithms/Segments/CSRKernelVector.h>
+#include <TNL/Algorithms/Segments/CSRKernelHybrid.h>
+#include <TNL/Algorithms/Segments/CSRKernelAdaptive.h>
 
 namespace TNL {
    namespace Algorithms {
       namespace Segments {
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel = CSRKernelScalar< Index, Device > >
 class CSRView
 {
    public:
 
       using DeviceType = Device;
       using IndexType = std::remove_const_t< Index >;
+      using KernelType = Kernel;
       using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >;
-      using ConstOffsetsView = typename Containers::Vector< Index, DeviceType,IndexType >::ConstViewType;
+      using ConstOffsetsView = typename Containers::Vector< Index, DeviceType, IndexType >::ConstViewType;
+      using KernelView = typename Kernel::ViewType;
       using ViewType = CSRView;
       template< typename Device_, typename Index_ >
-      using ViewTemplate = CSRView< Device_, Index_ >;
-      using ConstViewType = CSRView< Device, std::add_const_t< Index > >;
+      using ViewTemplate = CSRView< Device_, Index_, Kernel >;
+      using ConstViewType = CSRView< Device, std::add_const_t< Index >, Kernel >;
       using SegmentViewType = SegmentView< IndexType, RowMajorOrder >;
 
       __cuda_callable__
       CSRView();
 
       __cuda_callable__
-      CSRView( const OffsetsView& offsets );
+      CSRView( const OffsetsView& offsets, const KernelView& kernel );
 
       __cuda_callable__
-      CSRView( const OffsetsView&& offsets );
+      CSRView( const OffsetsView&& offsets, const KernelView&& kernel );
 
       __cuda_callable__
       CSRView( const CSRView& csr_view );
@@ -121,7 +128,30 @@ class CSRView
    protected:
 
       OffsetsView offsets;
+
+      KernelView kernel;
 };
+
+template< typename Device,
+          typename Index >
+using CSRViewScalar = CSRView< Device, Index, CSRKernelScalar< Index, Device > >;
+
+template< typename Device,
+          typename Index >
+using CSRViewVector = CSRView< Device, Index, CSRKernelVector< Index, Device > >;
+
+template< typename Device,
+          typename Index >
+using CSRViewHybrid = CSRView< Device, Index, CSRKernelHybrid< Index, Device > >;
+
+template< typename Device,
+          typename Index >
+using CSRViewAdaptive = CSRView< Device, Index, CSRKernelAdaptive< Index, Device > >;
+
+template< typename Device,
+          typename Index >
+using CSRViewDefault = CSRViewScalar< Device, Index >;
+
       } // namespace Segments
    }  // namespace Algorithms
 } // namespace TNL
diff --git a/src/TNL/Algorithms/Segments/CSRView.hpp b/src/TNL/Algorithms/Segments/CSRView.hpp
index 5537a1233ce47638e37d91aa0ea656f6d7de8f62..045b6bc5a2fe2c13a59cfcdcf7bc6d7234f76f17 100644
--- a/src/TNL/Algorithms/Segments/CSRView.hpp
+++ b/src/TNL/Algorithms/Segments/CSRView.hpp
@@ -22,122 +22,140 @@ namespace TNL {
 
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 CSRView()
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-CSRView< Device, Index >::
-CSRView( const OffsetsView& offsets_view )
-   : offsets( offsets_view )
+CSRView< Device, Index, Kernel >::
+CSRView( const OffsetsView& offsets_view,
+         const KernelView& kernel_view )
+   : offsets( offsets_view ), kernel( kernel_view )
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-CSRView< Device, Index >::
-CSRView( const OffsetsView&& offsets_view )
-   : offsets( offsets_view )
+CSRView< Device, Index, Kernel >::
+CSRView( const OffsetsView&& offsets_view,
+         const KernelView&& kernel_view )
+   : offsets( std::move( offsets_view ) ), kernel( std::move( kernel_view ) )
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 CSRView( const CSRView& csr_view )
-   : offsets( csr_view.offsets )
+   : offsets( csr_view.offsets ), kernel( csr_view.kernel )
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 CSRView( const CSRView&& csr_view )
-   : offsets( std::move( csr_view.offsets ) )
+   : offsets( std::move( csr_view.offsets ) ), kernel( std::move( csr_view.kernel ) )
 {
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 String
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 getSerializationType()
 {
-   return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >";
+   return "CSR< [any_device], " +
+      TNL::getSerializationType< IndexType >() +
+      TNL::getSerializationType< KernelType >() + " >";
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 String
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 getSegmentsType()
 {
    return "CSR";
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
-typename CSRView< Device, Index >::ViewType
-CSRView< Device, Index >::
+typename CSRView< Device, Index, Kernel >::ViewType
+CSRView< Device, Index, Kernel >::
 getView()
 {
-   return ViewType( this->offsets );
+   return ViewType( this->offsets, this->kernel );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
 auto
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 getConstView() const -> const ConstViewType
 {
-   return ConstViewType( this->offsets.getConstView() );
+   return ConstViewType( this->offsets.getConstView(), this->kernel.getConstView() );
 }
 
 template< typename Device,
-          typename Index >
-__cuda_callable__ auto CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+__cuda_callable__ auto CSRView< Device, Index, Kernel >::
 getSegmentsCount() const -> IndexType
 {
    return this->offsets.getSize() - 1;
 }
 
 template< typename Device,
-          typename Index >
-__cuda_callable__ auto CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+__cuda_callable__ auto CSRView< Device, Index, Kernel >::
 getSegmentSize( const IndexType segmentIdx ) const -> IndexType
 {
    return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx );
 }
 
 template< typename Device,
-          typename Index >
-__cuda_callable__ auto CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+__cuda_callable__ auto CSRView< Device, Index, Kernel >::
 getSize() const -> IndexType
 {
    return this->getStorageSize();
 }
 
 template< typename Device,
-          typename Index >
-__cuda_callable__ auto CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+__cuda_callable__ auto CSRView< Device, Index, Kernel >::
 getStorageSize() const -> IndexType
 {
    return details::CSR< Device, Index >::getStorageSize( this->offsets );
 }
 
 template< typename Device,
-          typename Index >
-__cuda_callable__ auto CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+__cuda_callable__ auto CSRView< Device, Index, Kernel >::
 getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType
 {
    if( ! std::is_same< DeviceType, Devices::Host >::value )
@@ -152,20 +170,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 __cuda_callable__
 auto
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType
 {
    return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
    template< typename Function, typename... Args >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 {
    const auto offsetsView = this->offsets;
@@ -181,73 +201,72 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
    template< typename Function, typename... Args >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 forAll( Function& f, Args... args ) const
 {
    this->forSegments( 0, this->getSegmentsCount(), f, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
-   using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType;
-   const auto offsetsView = this->offsets.getConstView();
-   auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable {
-      const IndexType begin = offsetsView[ segmentIdx ];
-      const IndexType end = offsetsView[ segmentIdx + 1 ];
-      RealType aux( zero );
-      IndexType localIdx( 0 );
-      bool compute( true );
-      for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++  )
-         aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) );
-      keeper( segmentIdx, aux );
-   };
-   Algorithms::ParallelFor< Device >::exec( first, last, l, args... );
+   if( std::is_same< DeviceType, TNL::Devices::Host >::value )
+      TNL::Algorithms::Segments::CSRKernelScalar< IndexType, DeviceType >::segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+   else
+      kernel.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
    template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const
 {
    this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... );
 }
 
 template< typename Device,
-          typename Index >
-CSRView< Device, Index >&
-CSRView< Device, Index >::
+          typename Index,
+          typename Kernel >
+CSRView< Device, Index, Kernel >&
+CSRView< Device, Index, Kernel >::
 operator=( const CSRView& view )
 {
    this->offsets.bind( view.offsets );
+   this->kernel = view.kernel;
    return *this;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 save( File& file ) const
 {
    file << this->offsets;
 }
 
 template< typename Device,
-          typename Index >
+          typename Index,
+          typename Kernel >
 void
-CSRView< Device, Index >::
+CSRView< Device, Index, Kernel >::
 load( File& file )
 {
    file >> this->offsets;
+   this->kernel.init( this->offsets );
 }
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelBlockDescriptor.h b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelBlockDescriptor.h
new file mode 100644
index 0000000000000000000000000000000000000000..96f1899b268596bc57ba395cec1556ab5fbdfff5
--- /dev/null
+++ b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelBlockDescriptor.h
@@ -0,0 +1,239 @@
+/***************************************************************************
+                          CSRAdaptiveKernelBlockDescriptor.h -  description
+                             -------------------
+    begin                : Jan 25, 2021 -> Joe Biden inauguration
+    copyright            : (C) 2021 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Algorithms {
+      namespace Segments {
+         namespace details {
+
+enum class Type {
+   /* LONG = 0!!! Non zero value rewrites index[1] */
+   LONG = 0,
+   STREAM = 1,
+   VECTOR = 2
+};
+
+#ifdef CSR_ADAPTIVE_UNION
+template< typename Index >
+union CSRAdaptiveKernelBlockDescriptor
+{
+   CSRAdaptiveKernelBlockDescriptor(Index row, Type type = Type::VECTOR, Index index = 0) noexcept
+   {
+      this->index[0] = row;
+      this->index[1] = index;
+      this->byte[sizeof(Index) == 4 ? 7 : 15] = (uint8_t)type;
+   }
+
+   CSRAdaptiveKernelBlockDescriptor(Index row, Type type, Index nextRow, Index maxID, Index minID) noexcept
+   {
+      this->index[0] = row;
+      this->index[1] = 0;
+      this->twobytes[sizeof(Index) == 4 ? 2 : 4] = maxID - minID;
+
+      if (type == Type::STREAM)
+         this->twobytes[sizeof(Index) == 4 ? 3 : 5] = nextRow - row;
+
+      if (type == Type::STREAM)
+         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b1000000;
+      else if (type == Type::VECTOR)
+         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b10000000;
+   }
+
+   CSRAdaptiveKernelBlockDescriptor() = default;
+
+   __cuda_callable__ Type getType() const
+   {
+      if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b1000000 )
+         return Type::STREAM;
+      if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b10000000 )
+         return Type::VECTOR;
+      return Type::LONG;
+   }
+
+   __cuda_callable__ const Index& getFirstSegment() const
+   {
+      return index[ 0 ];
+   }
+
+   /***
+    * \brief Returns number of elements covered by the block.
+    */
+   __cuda_callable__ const Index getSize() const
+   {
+      return twobytes[ sizeof(Index) == 4 ? 2 : 4 ];
+   }
+
+   /***
+    * \brief Returns number of segments covered by the block.
+    */
+   __cuda_callable__ const Index getSegmentsInBlock() const
+   {
+      return ( twobytes[ sizeof( Index ) == 4 ? 3 : 5 ] & 0x3FFF );
+   }
+
+   void print( std::ostream& str ) const
+   {
+      Type type = this->getType();
+      str << "Type: ";
+      switch( type )
+      {
+         case Type::STREAM:
+            str << " Stream ";
+            break;
+         case Type::VECTOR:
+            str << " Vector ";
+            break;
+         case Type::LONG:
+            str << " Long ";
+            break;
+      }
+      str << " first segment: " << getFirstSegment();
+      str << " block end: " << getSize();
+      str << " index in warp: " << index[ 1 ];
+   }
+   Index index[2]; // index[0] is row pointer, index[1] is index in warp
+   uint8_t byte[sizeof(Index) == 4 ? 8 : 16]; // byte[7/15] is type specificator
+   uint16_t twobytes[sizeof(Index) == 4 ? 4 : 8]; //twobytes[2/4] is maxID - minID
+                                                //twobytes[3/5] is nextRow - row
+};
+#else
+
+template< typename Index >
+struct CSRAdaptiveKernelBlockDescriptor
+{
+   CSRAdaptiveKernelBlockDescriptor( Index firstSegmentIdx,
+                                     Type type = Type::VECTOR,
+                                     uint8_t warpIdx = 0,
+                                     uint8_t warpsCount = 0 ) noexcept
+   {
+      this->firstSegmentIdx = firstSegmentIdx;
+      this->type = ( uint8_t ) type;
+      this->warpIdx = warpIdx;
+      this->warpsCount = warpsCount;
+      /*this->index[0] = row;
+      this->index[1] = index;
+      this->byte[sizeof(Index) == 4 ? 7 : 15] = (uint8_t)type;*/
+   }
+
+   CSRAdaptiveKernelBlockDescriptor( Index firstSegmentIdx,
+                                     Type type,
+                                     Index lastSegmentIdx,
+                                     Index end,
+                                     Index begin ) noexcept
+   {
+      this->firstSegmentIdx = firstSegmentIdx;
+      this->warpIdx = 0;
+      this->blockSize = end - begin;
+      this->segmentsInBlock = lastSegmentIdx - firstSegmentIdx;
+      this->type = ( uint8_t ) type;
+
+      /*this->index[0] = row;
+      this->index[1] = 0;
+      this->twobytes[sizeof(Index) == 4 ? 2 : 4] = maxID - minID;
+
+      if (type == Type::STREAM)
+         this->twobytes[sizeof(Index) == 4 ? 3 : 5] = nextRow - row;
+
+      if (type == Type::STREAM)
+         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b1000000;
+      else if (type == Type::VECTOR)
+         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b10000000;*/
+   }
+
+   CSRAdaptiveKernelBlockDescriptor() = default;
+
+   __cuda_callable__ Type getType() const
+   {
+      return ( Type ) this->type;
+      /*if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b1000000 )
+         return Type::STREAM;
+      if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b10000000 )
+         return Type::VECTOR;
+      return Type::LONG;*/
+   }
+
+   __cuda_callable__ const Index& getFirstSegment() const
+   {
+      return this->firstSegmentIdx;
+      //return index[ 0 ];
+   }
+
+   /***
+    * \brief Returns number of elements covered by the block.
+    */
+   __cuda_callable__ const Index getSize() const
+   {
+      return this->blockSize;
+      //return twobytes[ sizeof(Index) == 4 ? 2 : 4 ];
+   }
+
+   /***
+    * \brief Returns number of segments covered by the block.
+    */
+   __cuda_callable__ const Index getSegmentsInBlock() const
+   {
+      return this->segmentsInBlock;
+      //return ( twobytes[ sizeof( Index ) == 4 ? 3 : 5 ] & 0x3FFF );
+   }
+
+   __cuda_callable__ uint8_t getWarpIdx() const
+   {
+      return this->warpIdx;
+   }
+
+   __cuda_callable__ uint8_t getWarpsCount() const
+   {
+      return this->warpsCount;
+   }
+
+   void print( std::ostream& str ) const
+   {
+      str << "Type: ";
+      switch( this->getType() )
+      {
+         case Type::STREAM:
+            str << " Stream ";
+            break;
+         case Type::VECTOR:
+            str << " Vector ";
+            break;
+         case Type::LONG:
+            str << " Long ";
+            break;
+      }
+      str << " first segment: " << this->getFirstSegment();
+      str << " block end: " << this->getSize();
+      str << " index in warp: " << this->getWarpIdx();
+   }
+
+   uint8_t type;
+   Index firstSegmentIdx, blockSize, segmentsInBlock;
+   uint8_t warpIdx, warpsCount;
+
+   //Index index[2]; // index[0] is row pointer, index[1] is index in warp
+   //uint8_t byte[sizeof(Index) == 4 ? 8 : 16]; // byte[7/15] is type specificator
+   //uint16_t twobytes[sizeof(Index) == 4 ? 4 : 8]; //twobytes[2/4] is maxID - minID
+                                                //twobytes[3/5] is nextRow - row
+};
+
+#endif
+
+template< typename Index >
+std::ostream& operator<< ( std::ostream& str, const CSRAdaptiveKernelBlockDescriptor< Index >& block )
+{
+   block.print( str );
+   return str;
+}
+         } // namespace details
+      } // namespace Segments
+   }  // namespace Algorithms
+} // namespace TNL
diff --git a/src/TNL/Matrices/Legacy/CSR.h b/src/TNL/Matrices/Legacy/CSR.h
index 7570eac8be54c31bf61f364abe2c5a02413b4234..42f68b1277f994197c561ef7a4d000b0e600878e 100644
--- a/src/TNL/Matrices/Legacy/CSR.h
+++ b/src/TNL/Matrices/Legacy/CSR.h
@@ -115,8 +115,11 @@ public:
    static constexpr Index THREADS_VECTOR = 128;
    static constexpr Index THREADS_LIGHT = 128;
 
-   /* Max length of row to process one warp */
-   static constexpr Index MAX_ELEMENTS_PER_WARP = 1024;
+   /* Max length of row to process one warp for CSR Light, MultiVector */
+   static constexpr Index MAX_ELEMENTS_PER_WARP = 384;
+
+   /* Max length of row to process one warp for CSR Adaptive */
+   static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = 512;
 
    /* How many shared memory use per block in CSR Adaptive kernel */
    static constexpr Index SHARED_PER_BLOCK = 24576;
diff --git a/src/TNL/Matrices/Legacy/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h
index 580b63456c2071cb4b27a2c83e5d4e5737b91cd2..7a610c8257cb4450035fa0c46a928b0f84b377f5 100644
--- a/src/TNL/Matrices/Legacy/CSR_impl.h
+++ b/src/TNL/Matrices/Legacy/CSR_impl.h
@@ -143,7 +143,7 @@ Index findLimit(const Index start,
             type = Type::STREAM;
             return current;
          } else {                  // one long row
-            if (sum <= 2 * matrix.MAX_ELEMENTS_PER_WARP)
+            if (sum <= 2 * matrix.MAX_ELEMENTS_PER_WARP_ADAPT)
                type = Type::VECTOR;
             else
                type = Type::LONG;
@@ -1764,8 +1764,8 @@ void SpMVCSRAdaptivePrepare( const Real *inVector,
 
       SpMVCSRAdaptive< Real, Index, warpSize,
             matrix.WARPS,
-            matrix.SHARED_PER_WARP,
-            matrix.MAX_ELEMENTS_PER_WARP >
+            matrix.SHARED_PER_WARP, 
+            matrix.MAX_ELEMENTS_PER_WARP_ADAPT >
          <<<blocks, threads>>>(
                inVector,
                outVector,
diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 6d068f370f3be3a2d8b6eea20386054d6c984776..581d79c983d2a0961cfe7576c07e2d2dc9d5e5f9 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -45,7 +45,7 @@ template< typename Real =  double,
           typename Device = Devices::Host,
           typename Index = int,
           typename MatrixType = GeneralMatrix,
-          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Algorithms::Segments::CSR,
+          template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Algorithms::Segments::CSRDefault,
           typename ComputeReal = typename ChooseSparseMatrixComputeReal< Real, Index >::type,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
           typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > >
@@ -209,13 +209,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                     const IndexAllocatorType& indexAllocator = IndexAllocatorType() );
 
       /**
-<<<<<<< HEAD
        * \brief Constructor with matrix rows capacities and number of columns.
        *
-=======
-       * \brief Constructor with matrix rows capacities given as an initializer list and a number of columns.
-       * 
->>>>>>> Added SparseMatrix constructor with row capacities vector.
        * The number of matrix rows is given by the size of \e rowCapacities list.
        *
        * \tparam ListIndex is the initializer list values type.
@@ -238,9 +233,9 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
 
       /**
        * \brief Constructor with matrix rows capacities given as a vector and number of columns.
-       * 
+       *
        * The number of matrix rows is given by the size of \e rowCapacities vector.
-       * 
+       *
        * \tparam RowCapacitiesVector is the row capacities vector type. Usually it is some of
        *    \ref TNL::Containers::Array, \ref TNL::Containers::ArrayView, \ref TNL::Containers::Vector or
        *    \ref TNL::Containers::VectorView.
@@ -249,7 +244,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
        * \param columns is the number of matrix columns.
        * \param realAllocator is used for allocation of matrix elements values.
        * \param indexAllocator is used for allocation of matrix elements column indexes.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_rowCapacities_vector.cpp
        * \par Output
diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h
index f91e471e8a2158de04f46946513e149ac530f338..9b69c2e91c12a7419dbe9764ccae3ceb567602e2 100644
--- a/src/TNL/Matrices/SparseMatrixView.h
+++ b/src/TNL/Matrices/SparseMatrixView.h
@@ -36,10 +36,10 @@ struct ChooseSparseMatrixComputeReal< bool, Index >
  *
  * It serves as an accessor to \ref SparseMatrix for example when passing the
  * matrix to lambda functions. SparseMatrix view can be also created in CUDA kernels.
- * 
- * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated 
+ *
+ * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated
  *    as binary and so the matrix elements values are not stored in the memory since we need
- *    to remember only coordinates of non-zero elements( which equal one). 
+ *    to remember only coordinates of non-zero elements( which equal one).
  * \tparam Device is a device where the matrix is allocated.
  * \tparam Index is a type for indexing of the matrix elements.
  * \tparam MatrixType specifies a symmetry of matrix. See \ref MatrixType. Symmetric
@@ -50,13 +50,13 @@ struct ChooseSparseMatrixComputeReal< bool, Index >
  *    \ref Ellpack, \ref SlicedEllpack, \ref ChunkedEllpack or \ref BiEllpack.
  * \tparam ComputeReal is the same as \e Real mostly but for binary matrices it is set to \e Index type. This can be changed
  *    bu the user, of course.
- * 
+ *
  */
 template< typename Real,
           typename Device = Devices::Host,
           typename Index = int,
           typename MatrixType = GeneralMatrix,
-          template< typename Device_, typename Index_ > class SegmentsView = Algorithms::Segments::CSRView,
+          template< typename Device_, typename Index_ > class SegmentsView = Algorithms::Segments::CSRViewDefault,
           typename ComputeReal = typename ChooseSparseMatrixComputeReal< Real, Index >::type >
 class SparseMatrixView : public MatrixView< Real, Device, Index >
 {
@@ -79,14 +79,14 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Test of symmetric matrix type.
-       * 
+       *
        * \return \e true if the matrix is stored as symmetric and \e false otherwise.
        */
       static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); };
 
       /**
        * \brief Test of binary matrix type.
-       * 
+       *
        * \return \e true if the matrix is stored as binary and \e false otherwise.
        */
       static constexpr bool isBinary() { return std::is_same< Real, bool >::value; };
@@ -120,7 +120,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
       using SegmentsViewType = SegmentsView< Device, Index >;
 
       /**
-       * \brief Type of related matrix view. 
+       * \brief Type of related matrix view.
        */
       using ViewType = SparseMatrixView< std::remove_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >;
 
@@ -158,7 +158,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Constructor with all necessary data and views.
-       * 
+       *
        * \param rows is a number of matrix rows.
        * \param columns is a number of matrix columns.
        * \param values is a vector view with matrix elements values.
@@ -174,7 +174,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Copy constructor.
-       * 
+       *
        * \param matrix is an input sparse matrix view.
        */
       __cuda_callable__
@@ -182,7 +182,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Move constructor.
-       * 
+       *
        * \param matrix is an input sparse matrix view.
        */
       __cuda_callable__
@@ -190,7 +190,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns a modifiable view of the sparse matrix.
-       * 
+       *
        * \return sparse matrix view.
        */
       __cuda_callable__
@@ -198,7 +198,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns a non-modifiable view of the sparse matrix.
-       * 
+       *
        * \return sparse matrix view.
        */
       __cuda_callable__
@@ -206,11 +206,11 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns string with serialization type.
-       * 
+       *
        * The string has a form `Matrices::SparseMatrix< RealType,  [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`.
-       * 
+       *
        * \return \ref String with the serialization type.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixViewExample_getSerializationType.cpp
        * \par Output
@@ -220,11 +220,11 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns string with serialization type.
-       * 
+       *
        * See \ref SparseMatrix::getSerializationType.
-       * 
+       *
        * \return \e String with the serialization type.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp
        * \par Output
@@ -234,10 +234,10 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Computes number of non-zeros in each row.
-       * 
+       *
        * \param rowLengths is a vector into which the number of non-zeros in each row
        * will be stored.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixViewExample_getCompressedRowLengths.cpp
        * \par Output
@@ -248,7 +248,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns capacity of given matrix row.
-       * 
+       *
        * \param row index of matrix row.
        * \return number of matrix elements allocated for the row.
        */
@@ -257,26 +257,26 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Returns number of non-zero matrix elements.
-       * 
+       *
        * This method really counts the non-zero matrix elements and so
        * it returns zero for matrix having all allocated elements set to zero.
-       * 
+       *
        * \return number of non-zero matrix elements.
        */
       IndexType getNonzeroElementsCount() const;
 
       /**
        * \brief Constant getter of simple structure for accessing given matrix row.
-       * 
+       *
        * \param rowIdx is matrix row index.
-       * 
+       *
        * \return RowView for accessing given matrix row.
        *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixViewExample_getConstRow.cpp
        * \par Output
        * \include SparseMatrixViewExample_getConstRow.out
-       * 
+       *
        * See \ref SparseMatrixRowView.
        */
       __cuda_callable__
@@ -284,16 +284,16 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Non-constant getter of simple structure for accessing given matrix row.
-       * 
+       *
        * \param rowIdx is matrix row index.
-       * 
+       *
        * \return RowView for accessing given matrix row.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixViewExample_getRow.cpp
        * \par Output
        * \include SparseMatrixViewExample_getRow.out
-       * 
+       *
        * See \ref SparseMatrixRowView.
        */
       __cuda_callable__
@@ -301,7 +301,7 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
 
       /**
        * \brief Sets element at given \e row and \e column to given \e value.
-       * 
+       *
        * This method can be called from the host system (CPU) no matter
        * where the matrix is allocated. If the matrix is allocated on GPU this method
        * can be called even from device kernels. If the matrix is allocated in GPU device
@@ -309,11 +309,11 @@ class SparseMatrixView : public MatrixView< Real, Device, Index >
        * performance is very low. For higher performance see. \ref SparseMatrix::getRow
        * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows.
        * The call may fail if the matrix row capacity is exhausted.
-       * 
+       *
        * \param row is row index of the element.
        * \param column is columns index of the element.
        * \param value is the value the element will be set to.
-       * 
+       *
        * \par Example
        * \include Matrices/SparseMatrix/SparseMatrixViewExample_setElement.cpp
        * \par Output
diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp
index b031e846d5094955d037b8ed55f21967d524b571..26217620bbbc7ad3226829937be257ba7935d7ff 100644
--- a/src/TNL/Matrices/SparseMatrixView.hpp
+++ b/src/TNL/Matrices/SparseMatrixView.hpp
@@ -383,8 +383,8 @@ vectorProduct( const InVector& inVector,
    static_assert(
          ! MatrixType::isSymmetric() ||
          ! std::is_same< Device, Devices::Cuda >::value ||
-         ( std::is_same< OutVectorReal, float >::value || 
-           std::is_same< OutVectorReal, double >::value || 
+         ( std::is_same< OutVectorReal, float >::value ||
+           std::is_same< OutVectorReal, double >::value ||
            std::is_same< OutVectorReal, int >::value ||
            std::is_same< OutVectorReal, long long int >::value ),
          "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." );
@@ -484,6 +484,7 @@ rowsReduction( IndexType begin, IndexType end, Fetch& fetch, const Reduce& reduc
    const auto values_view = this->values.getConstView();
    const IndexType paddingIndex_ = this->getPaddingIndex();
    auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) {
+      TNL_ASSERT_LT( globalIdx, columns_view.getSize(), "" );
       IndexType columnIdx = columns_view[ globalIdx ];
       if( columnIdx != paddingIndex_ )
       {
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
index a4eb9e8aae26786412fe8945a9ccf2795f6293fa..8791b95e2500cc415e311935348aa791bcc8fd93 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h
@@ -77,7 +77,7 @@ public:
 
 protected:
    // The factors L and U are stored separately and the rows of U are reversed.
-   Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSR > L, U;
+   Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U;
 
    // Specialized methods to distinguish between normal and distributed matrices
    // in the implementation.
diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
index 344daf1a0103a0a93ca576358b2da787d7578f8b..82ab88e862603464ebfb5f8b799a490f9d158e86 100644
--- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
+++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h
@@ -66,7 +66,7 @@ protected:
    Real tau = 1e-4;
 
    // The factors L and U are stored separately and the rows of U are reversed.
-   Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSR > L, U;
+   Matrices::SparseMatrix< RealType, DeviceType, IndexType, Matrices::GeneralMatrix, Algorithms::Segments::CSRDefault > L, U;
 
    // Specialized methods to distinguish between normal and distributed matrices
    // in the implementation.
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
index 8a6e0abddda2cfcb9acd70c6b8ba350cfeb3d28e..609a6afd74cd3e37ab1856529815b79ffd3ab9cf 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h
@@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index
 template< typename Device, typename Index, typename IndexAllocator >
 using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
-using CSR_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
-using CSR_cuda = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
+using CSR_host = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
 using E_host   = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using E_cuda   = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using SE_host  = TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
index 8f7dad73c719fd5121650a4e6170149a69075036..5a4e98915cbec11a8107194b5b3ed33ab26e4e8c 100644
--- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h
@@ -29,11 +29,11 @@ protected:
 // types for which MatrixTest is instantiated
 using CSRMatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >,
-    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR, int >
+   ,TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >,
+    TNL::Matrices::SparseMatrix< bool, TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault, int >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index b713c8f0ca76d534b8abb903097b77b8bc8bd22b..a65411fc065b299fdc86d2e2550f07456257c75e 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -6,7 +6,10 @@ set( COMMON_TESTS
             TridiagonalMatrixTest
             MultidiagonalMatrixTest
 
-            SparseMatrixTest_CSR
+            SparseMatrixTest_CSRScalar
+            SparseMatrixTest_CSRVector
+            SparseMatrixTest_CSRHybrid
+            SparseMatrixTest_CSRAdaptive
             SparseMatrixTest_Ellpack
             SparseMatrixTest_SlicedEllpack
             SparseMatrixTest_ChunkedEllpack
diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
index d86eb57f5cf6fbdaafe51734d9ea834f2bb8823e..dfdcc3b83556183e6e935b21545dbd5b2c8c3347 100644
--- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h
@@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index
 template< typename Device, typename Index, typename IndexAllocator >
 using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
-using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
-using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
 using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
index c9f68b5885849209b0e5d1848a16c313b3a32fbd..826b7af6b6bcb2cf76b7f0a6f81492341bd51fbe 100644
--- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h
+++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h
@@ -27,8 +27,8 @@ using EllpackSegments = TNL::Algorithms::Segments::Ellpack< Device, Index, Index
 template< typename Device, typename Index, typename IndexAllocator >
 using SlicedEllpackSegments = TNL::Algorithms::Segments::SlicedEllpack< Device, Index, IndexAllocator >;
 
-using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
-using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >;
+using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
+using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRDefault >;
 using E_host   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using E_cuda   = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >;
 using SE_host  = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index b5885afbe83a75ee51eb078c75d1b65e02eaeabc..00794032e6e859462c720e7695f2e311616a0ce9 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -1070,7 +1070,6 @@ void test_VectorProduct()
        outVector_1.setElement( j, 0 );
 
    m_1.vectorProduct( inVector_1, outVector_1 );
-
    EXPECT_EQ( outVector_1.getElement( 0 ),  2 );
    EXPECT_EQ( outVector_1.getElement( 1 ), 10 );
    EXPECT_EQ( outVector_1.getElement( 2 ),  8 );
@@ -1310,7 +1309,7 @@ void test_VectorProduct()
 
    /////
    // Large test
-   const IndexType size( 35 );
+   const IndexType size( 1051 );
    //for( int size = 1; size < 1000; size++ )
    {
       //std::cerr << " size = " << size << std::endl;
@@ -1338,26 +1337,28 @@ void test_VectorProduct()
          EXPECT_EQ( out.getElement( i ), i + 1 );
 
       // Test with large triangular matrix
-      Matrix m2( size, size );
-      rowCapacities.evaluate( [] __cuda_callable__ ( IndexType i ) { return i + 1; } );
+      const int rows( size ), columns( size );
+      Matrix m2( rows, columns );
+      rowCapacities.setSize( rows );
+      rowCapacities.evaluate( [=] __cuda_callable__ ( IndexType i ) { return i + 1; } );
       m2.setRowCapacities( rowCapacities );
       auto f2 = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType& column, RealType& value, bool& compute ) {
          if( localIdx <= row )
          {
-            value = row -localIdx + 1;
+            value = localIdx + 1;
             column = localIdx;
          }
       };
       m2.forAllRows( f2 );
       // check that the matrix was initialized
-      TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( size );
+      TNL::Containers::Vector< IndexType, DeviceType, IndexType > rowLengths( rows );
       m2.getCompressedRowLengths( rowLengths );
       EXPECT_EQ( rowLengths, rowCapacities );
 
+      out.setSize( rows );
       out = 0.0;
       m2.vectorProduct( in, out );
-      //std::cerr << out << std::endl;
-      for( IndexType i = 0; i < size; i++ )
+      for( IndexType i = 0; i < rows; i++ )
          EXPECT_EQ( out.getElement( i ), ( i + 1 ) * ( i + 2 ) / 2 );
    }
 }
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..41306c6da762c35be285912d097541e45e2f641b
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRAdaptive.cpp -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRAdaptive.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cu
new file mode 100644
index 0000000000000000000000000000000000000000..50a4333330398f537c95a2b1f17fd3c1c73ba655
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRAdaptive.cu -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRAdaptive.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.h b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.h
new file mode 100644
index 0000000000000000000000000000000000000000..2756868220b220e1bb1e78cea02b9903128ff413
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRAdaptive.h
@@ -0,0 +1,46 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRAdaptive.h -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <iostream>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRAdaptive_segments";
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRAdaptive >
+#endif
+>;
+
+#endif
+
+#include "SparseMatrixTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..214ed2ca7c6990dd03932cfc862f82dc1633f865
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRHybrid.cpp -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRHybrid.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c0a0918d782b7c86dd603927f7886cb5019f8cb2
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRHybrid.cu -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRHybrid.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.h b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.h
new file mode 100644
index 0000000000000000000000000000000000000000..24ba77fa0e87ab13081622ed7cbd52180f035eca
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRHybrid.h
@@ -0,0 +1,46 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRHybrid.h -  description
+                             -------------------
+    begin                : Jan 23, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <iostream>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRHybrid_segments";
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRHybrid >
+#endif
+>;
+
+#endif
+
+#include "SparseMatrixTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cpp
similarity index 78%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
rename to src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cpp
index 91f0de81a928a6f5676b2d839a35496dfdae61c3..0f73d79aae249be2af3328f6a9dc459f5a6cbf3b 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_CSR.cu -  description
+                          SparseMatrixTest_CSRScalar.cpp -  description
                              -------------------
     begin                : Dec 3, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
@@ -8,4 +8,4 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "SparseMatrixTest_CSR.h"
+#include "SparseMatrixTest_CSRScalar.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cu
similarity index 78%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
rename to src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cu
index 5830658abd3135064cc46c8a7c534252a0421935..ff22ae692560e45b5fa0d2380a6a71267fba80a9 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.cu
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_CSR.cpp -  description
+                          SparseMatrixTest_CSRScalar.cu -  description
                              -------------------
     begin                : Dec 3, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
@@ -8,4 +8,4 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
-#include "SparseMatrixTest_CSR.h"
+#include "SparseMatrixTest_CSRScalar.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.h
similarity index 89%
rename from src/UnitTests/Matrices/SparseMatrixTest_CSR.h
rename to src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.h
index e090f5f62e920b2e307d551ccc2301e36027b6fb..0902ee81a6400a62131227a7f616a2da7b9d78e9 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRScalar.h
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          SparseMatrixTest_CSR.h -  description
+                          SparseMatrixTest_CSRScalar.h -  description
                              -------------------
     begin                : Dec 2, 2019
     copyright            : (C) 2019 by Tomas Oberhuber et al.
@@ -15,28 +15,28 @@
 #ifdef HAVE_GTEST
 #include <gtest/gtest.h>
 
-const char* saveAndLoadFileName = "test_SparseMatrixTest_CSR_segments";
+const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRScalar_segments";
 
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >
 #ifdef HAVE_CUDA
-   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSR >
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >
 #endif
 >;
 
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c60c5e1f7d39dacb628eec317f8fb8b31366b574
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cpp
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRVector.cpp -  description
+                             -------------------
+    begin                : Jan 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRVector.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cu
new file mode 100644
index 0000000000000000000000000000000000000000..5c78647a1395ac87466c74d7f703cd57b0336e68
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.cu
@@ -0,0 +1,11 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRVector.cu -  description
+                             -------------------
+    begin                : Jan 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include "SparseMatrixTest_CSRVector.h"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.h b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d50fc686c3f828321f1fe40b97dd8ee2d1c3205
--- /dev/null
+++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRVector.h
@@ -0,0 +1,46 @@
+/***************************************************************************
+                          SparseMatrixTest_CSRVector.h -  description
+                             -------------------
+    begin                : Jan 22, 2021
+    copyright            : (C) 2021 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#include <iostream>
+#include <TNL/Algorithms/Segments/CSR.h>
+#include <TNL/Matrices/SparseMatrix.h>
+
+#ifdef HAVE_GTEST
+#include <gtest/gtest.h>
+
+const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRVector_segments";
+
+// types for which MatrixTest is instantiated
+using MatrixTypes = ::testing::Types
+<
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >
+#ifdef HAVE_CUDA
+   ,TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRVector >
+#endif
+>;
+
+#endif
+
+#include "SparseMatrixTest.h"
+#include "../main.h"
diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
index 439fab7dfec75dcde32bc880abfe114b5a0257fd..5feb97e11cfa36adddf320cf278eb9860c36ff71 100644
--- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
+++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h
@@ -24,31 +24,31 @@
 // types for which MatrixTest is instantiated
 using MatrixTypes = ::testing::Types
 <
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< long,    TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Host, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >
 #ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU.
-   ,//TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >,
-    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSR >
+   ,//TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, int,   TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< int,     TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< long,    TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< float,   TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >,
+    //TNL::Matrices::SparseMatrix< double,  TNL::Devices::Cuda, long,  TNL::Matrices::SymmetricMatrix, TNL::Algorithms::Segments::CSRDefault >
 #endif // HAVE_CUDA
 >;