diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
index 46d323f0235fe48926b87294f3efa0cc42b91e6d..5ade54d02eedd3e2a5c1b2778bc8e3a215058c2c 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.h
@@ -63,13 +63,15 @@ struct CSRAdaptiveKernel
    using BlocksType = typename ViewType::BlocksType;
    using BlocksView = typename BlocksType::ViewType;
 
+   static constexpr int MaxValueSizeLog() { return ViewType::MaxValueSizeLog; };
+
    static TNL::String getKernelType();
 
 
-   static constexpr Index THREADS_ADAPTIVE = details::CSRAdaptiveKernelParameters< Index >::CudaBlockSize(); //sizeof(Index) == 8 ? 128 : 256;
+   static constexpr Index THREADS_ADAPTIVE = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::CudaBlockSize(); //sizeof(Index) == 8 ? 128 : 256;
 
    // How many shared memory use per block in CSR Adaptive kernel
-   static constexpr Index SHARED_PER_BLOCK = details::CSRAdaptiveKernelParameters< Index >::StreamedSharedMemory(); //20000; //24576; TODO:
+   static constexpr Index SHARED_PER_BLOCK = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::StreamedSharedMemory(); //20000; //24576; TODO:
 
    // Number of elements in shared memory 
    static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(double);
@@ -84,14 +86,7 @@ struct CSRAdaptiveKernel
    static constexpr Index MAX_ELEMENTS_PER_WARP = 384;
 
    // Max length of row to process one warp for CSR Adaptive 
-   static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = details::CSRAdaptiveKernelParameters< Index >::MaxAdaptiveElementsPerWarp();
-
-   template< typename Offsets >
-   Index findLimit( const Index start,
-                    const Offsets& offsets,
-                    const Index size,
-                    details::Type &type,
-                    Index &sum );
+   static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = details::CSRAdaptiveKernelParameters< sizeof( Index ) >::MaxAdaptiveElementsPerWarp();
 
    template< typename Offsets >
    void init( const Offsets& offsets );
@@ -118,7 +113,21 @@ struct CSRAdaptiveKernel
                         Args... args ) const;
 
    protected:
-      BlocksType blocks;
+      template< int SizeOfValue, typename Offsets >
+      Index findLimit( const Index start,
+                     const Offsets& offsets,
+                     const Index size,
+                     details::Type &type,
+                     Index &sum );
+
+      template< int SizeOfValue,
+                typename Offsets >
+      void initValueSize( const Offsets& offsets );
+
+      /**
+       * \brief  blocksArray[ i ] stores blocks for sizeof( Value ) == 2^i.
+       */
+      BlocksType blocksArray[ MaxValueSizeLog() ];
 
       ViewType view;
 };
diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
index 4c53a83ca39102c8de7242850c50c0174206dd4f..ff2db147be16312e79260fbd2ed4531570e5816f 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernel.hpp
@@ -31,10 +31,81 @@ getKernelType()
    return ViewType::getKernelType();
 };
 
-
 template< typename Index,
           typename Device >
    template< typename Offsets >
+void
+CSRAdaptiveKernel< Index, Device >::
+init( const Offsets& offsets )
+{
+   this->template initValueSize<  1 >( offsets );
+   this->template initValueSize<  2 >( offsets );
+   this->template initValueSize<  4 >( offsets );
+   this->template initValueSize<  8 >( offsets );
+   this->template initValueSize< 16 >( offsets );
+   this->template initValueSize< 32 >( offsets );
+   for( int i = 0; i < MaxValueSizeLog(); i++ )
+      this->view.setBlocks( blocksArray[ i ], i );
+}
+
+
+template< typename Index,
+          typename Device >
+void
+CSRAdaptiveKernel< Index, Device >::
+reset()
+{
+   for( int i = 0; i < MaxValueSizeLog(); i++ )
+   {
+      this->blocksArray[ i ].reset();
+      this->view.setBlocks( this->blocksArray[ i ], i );
+   }
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getView() -> ViewType
+{
+   return this->view;
+}
+
+template< typename Index,
+          typename Device >
+auto
+CSRAdaptiveKernel< Index, Device >::
+getConstView() const -> ConstViewType
+{
+   return this->view;
+};
+
+template< typename Index,
+          typename Device >
+   template< typename OffsetsView,
+               typename Fetch,
+               typename Reduction,
+               typename ResultKeeper,
+               typename Real,
+               typename... Args >
+void
+CSRAdaptiveKernel< Index, Device >::
+segmentsReduction( const OffsetsView& offsets,
+                   Index first,
+                   Index last,
+                   Fetch& fetch,
+                   const Reduction& reduction,
+                   ResultKeeper& keeper,
+                   const Real& zero,
+                   Args... args ) const
+{
+   view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+}
+
+template< typename Index,
+          typename Device >
+   template< int SizeOfValue,
+             typename Offsets >
 Index
 CSRAdaptiveKernel< Index, Device >::
 findLimit( const Index start,
@@ -48,7 +119,7 @@ findLimit( const Index start,
    {
       Index elements = offsets[ current + 1 ] - offsets[ current ];
       sum += elements;
-      if( sum > SHARED_PER_WARP )
+      if( sum > details::CSRAdaptiveKernelParameters< SizeOfValue >::StreamedSharedElementsPerWarp() )
       {
          if( current - start > 0 ) // extra row
          {
@@ -57,7 +128,7 @@ findLimit( const Index start,
          }
          else
          {                  // one long row
-            if( sum <= 2 * MAX_ELEMENTS_PER_WARP_ADAPT )
+            if( sum <= 2 * details::CSRAdaptiveKernelParameters< SizeOfValue >::MaxAdaptiveElementsPerWarp() ) //MAX_ELEMENTS_PER_WARP_ADAPT )
                type = details::Type::VECTOR;
             else
                type = details::Type::LONG;
@@ -67,14 +138,15 @@ findLimit( const Index start,
    }
    type = details::Type::STREAM;
    return size - 1; // return last row pointer
-   }
+}
 
 template< typename Index,
           typename Device >
-   template< typename Offsets >
+   template< int SizeOfValue,
+             typename Offsets >
 void
 CSRAdaptiveKernel< Index, Device >::
-init( const Offsets& offsets )
+initValueSize( const Offsets& offsets )
 {
    using HostOffsetsType = TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >;
    HostOffsetsType hostOffsets( offsets );
@@ -88,7 +160,7 @@ init( const Offsets& offsets )
    while( nextStart != rows - 1 )
    {
       details::Type type;
-      nextStart = findLimit( start, hostOffsets, rows, type, sum );
+      nextStart = findLimit< SizeOfValue >( start, hostOffsets, rows, type, sum );
 
       if( type == details::Type::LONG )
       {
@@ -110,58 +182,10 @@ init( const Offsets& offsets )
       start = nextStart;
    }
    inBlocks.emplace_back(nextStart);
-   this->blocks = inBlocks;
-   this->view.setBlocks( blocks );
-}
-
-template< typename Index,
-          typename Device >
-void
-CSRAdaptiveKernel< Index, Device >::
-reset()
-{
-   this->blocks.reset();
-   this->view.setBlocks( blocks );
-}
-
-template< typename Index,
-          typename Device >
-auto
-CSRAdaptiveKernel< Index, Device >::
-getView() -> ViewType
-{
-   return this->view;
-}
-
-template< typename Index,
-          typename Device >
-auto
-CSRAdaptiveKernel< Index, Device >::
-getConstView() const -> ConstViewType
-{
-   return this->view;
-};
-
-template< typename Index,
-          typename Device >
-   template< typename OffsetsView,
-               typename Fetch,
-               typename Reduction,
-               typename ResultKeeper,
-               typename Real,
-               typename... Args >
-void
-CSRAdaptiveKernel< Index, Device >::
-segmentsReduction( const OffsetsView& offsets,
-                   Index first,
-                   Index last,
-                   Fetch& fetch,
-                   const Reduction& reduction,
-                   ResultKeeper& keeper,
-                   const Real& zero,
-                   Args... args ) const
-{
-   view.segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
+   //std::cerr << "Setting blocks to " << std::log2( SizeOfValue ) << std::endl;
+   TNL_ASSERT_LT( std::log2( SizeOfValue ), MaxValueSizeLog(), "" );
+   TNL_ASSERT_GE( std::log2( SizeOfValue ), 0, "" );
+   this->blocksArray[ (int ) std::log2( SizeOfValue ) ] = inBlocks;
 }
 
       } // namespace Segments
diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.h b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.h
index 431b72f4e37b98cf5a00fc64fdb6ff0e7d727e9c..ea008fdc7c6efc3ae75b76142b4f318dce823791 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.h
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.h
@@ -28,11 +28,13 @@ struct CSRAdaptiveKernelView
    using BlocksType = TNL::Containers::Vector< details::CSRAdaptiveKernelBlockDescriptor< Index >, Device, Index >;
    using BlocksView = typename BlocksType::ViewType;
 
+   static constexpr int MaxValueSizeLog = 6;
+
    CSRAdaptiveKernelView() = default;
 
    CSRAdaptiveKernelView( BlocksType& blocks );
 
-   void setBlocks( BlocksType& blocks );
+   void setBlocks( BlocksType& blocks, const int idx );
 
    ViewType getView();
 
@@ -57,14 +59,14 @@ struct CSRAdaptiveKernelView
 
    CSRAdaptiveKernelView& operator=( const CSRAdaptiveKernelView< Index, Device >& kernelView );
 
-   void printBlocks() const;
+   void printBlocks( int idx ) const;
 
    protected:
-      BlocksView blocks;
+      BlocksView blocksArray[ MaxValueSizeLog ];
 };
 
       } // namespace Segments
    }  // namespace Algorithms
 } // namespace TNL
 
-#include <TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp>
\ No newline at end of file
+#include <TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp>
diff --git a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
index 743f0902f5694aba54647e017f712ceb5c689fae..d4a369f252ffd61be1ade8589a3070bccfc032f7 100644
--- a/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
+++ b/src/TNL/Algorithms/Segments/CSRAdaptiveKernelView.hpp
@@ -50,10 +50,10 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
                                     Real zero,
                                     Args... args )
 {
-   static constexpr int CudaBlockSize = details::CSRAdaptiveKernelParameters< Real >::CudaBlockSize();
-   constexpr int WarpSize = Cuda::getWarpSize();
-   constexpr int WarpsCount = details::CSRAdaptiveKernelParameters< Real >::WarpsCount();
-   constexpr size_t StreamedSharedElementsPerWarp  = details::CSRAdaptiveKernelParameters< Real >::StreamedSharedElementsPerWarp();
+   static constexpr int CudaBlockSize = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize();
+   //constexpr int WarpSize = Cuda::getWarpSize();
+   //constexpr int WarpsCount = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::WarpsCount();
+   //constexpr size_t StreamedSharedElementsPerWarp  = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::StreamedSharedElementsPerWarp();
 
 
    __shared__ Real streamShared[ WARPS ][ SHARED_PER_WARP ];
@@ -199,21 +199,21 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
 }
 #endif
 
-template< typename Index,
+/*template< typename Index,
           typename Device >
 CSRAdaptiveKernelView< Index, Device >::
 CSRAdaptiveKernelView( BlocksType& blocks )
 {
    this->blocks.bind( blocks );
-}
+}*/
 
 template< typename Index,
           typename Device >
 void
 CSRAdaptiveKernelView< Index, Device >::
-setBlocks( BlocksType& blocks )
+setBlocks( BlocksType& blocks, const int idx )
 {
-   this->blocks.bind( blocks );
+   this->blocksArray[ idx ].bind( blocks );
 }
 
 template< typename Index,
@@ -263,23 +263,25 @@ segmentsReduction( const OffsetsView& offsets,
                    Args... args ) const
 {
 #ifdef HAVE_CUDA
-   if( details::CheckFetchLambda< Index, Fetch >::hasAllParameters() )
+   int valueSizeLog = std::ceil( log2f( ( double ) sizeof( Real ) ) );
+
+   if( details::CheckFetchLambda< Index, Fetch >::hasAllParameters() || valueSizeLog > MaxValueSizeLog )
    {
       TNL::Algorithms::Segments::CSRScalarKernel< Index, Device >::
          segmentsReduction( offsets, first, last, fetch, reduction, keeper, zero, args... );
       return;
    }
 
-   static constexpr Index THREADS_ADAPTIVE = details::CSRAdaptiveKernelParameters< Real >::CudaBlockSize(); //sizeof(Index) == 8 ? 128 : 256;
+   static constexpr Index THREADS_ADAPTIVE = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::CudaBlockSize(); //sizeof(Index) == 8 ? 128 : 256;
 
    /* Max length of row to process one warp for CSR Light, MultiVector */
    //static constexpr Index MAX_ELEMENTS_PER_WARP = 384;
 
    /* Max length of row to process one warp for CSR Adaptive */
-   static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = details::CSRAdaptiveKernelParameters< Real >::MaxAdaptiveElementsPerWarp();
+   //static constexpr Index MAX_ELEMENTS_PER_WARP_ADAPT = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::MaxAdaptiveElementsPerWarp();
 
    /* How many shared memory use per block in CSR Adaptive kernel */
-   static constexpr Index SHARED_PER_BLOCK = details::CSRAdaptiveKernelParameters< Real >::StreamedSharedMemory();
+   static constexpr Index SHARED_PER_BLOCK = details::CSRAdaptiveKernelParameters< sizeof( Real ) >::StreamedSharedMemory();
 
    /* Number of elements in shared memory */
    static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(Real);
@@ -298,7 +300,7 @@ segmentsReduction( const OffsetsView& offsets,
    constexpr size_t MAX_X_DIM = 2147483647;
 
    /* Fill blocks */
-   size_t neededThreads = this->blocks.getSize() * warpSize; // one warp per block
+   size_t neededThreads = this->blocksArray[ valueSizeLog ].getSize() * warpSize; // one warp per block
    /* Execute kernels on device */
    for (Index gridIdx = 0; neededThreads != 0; gridIdx++ )
    {
@@ -317,12 +319,12 @@ segmentsReduction( const OffsetsView& offsets,
             warpSize,
             WARPS,
             SHARED_PER_WARP,
-            details::CSRAdaptiveKernelParameters< Real >::MaxAdaptiveElementsPerWarp(),
+            details::CSRAdaptiveKernelParameters< sizeof( Real ) >::MaxAdaptiveElementsPerWarp(),
             BlocksView,
             OffsetsView,
             Index, Fetch, Reduction, ResultKeeper, Real, Args... >
          <<<blocksCount, threads>>>(
-            this->blocks,
+            this->blocksArray[ valueSizeLog ],
             gridIdx,
             offsets,
             first,
@@ -342,7 +344,8 @@ CSRAdaptiveKernelView< Index, Device >&
 CSRAdaptiveKernelView< Index, Device >::
 operator=( const CSRAdaptiveKernelView< Index, Device >& kernelView )
 {
-   this->blocks.bind( kernelView.blocks );
+   for( int i = 0; i < MaxValueSizeLog; i++ )
+      this->blocksArray[ i ].bind( kernelView.blocksArray[ i ] );
    return *this;
 }
 
@@ -350,8 +353,9 @@ template< typename Index,
           typename Device >
 void
 CSRAdaptiveKernelView< Index, Device >::
-printBlocks() const
+printBlocks( int idx ) const
 {
+   auto& blocks = this->blocksArray[ idx ];
    for( Index i = 0; i < this->blocks.getSize(); i++ )
    {
       auto block = blocks.getElement( i );
diff --git a/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h
index f9dedbaf0bbafb5f346102a0de5abdd8a80ab05a..2546580f8f3492460460748f07a28a17ce9dbcdf 100644
--- a/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h
+++ b/src/TNL/Algorithms/Segments/details/CSRAdaptiveKernelParameters.h
@@ -15,7 +15,7 @@ namespace TNL {
       namespace Segments {
          namespace details {
 
-template< typename Value,
+template< int SizeOfValue,
           int StreamedSharedMemory_ = 24576 >
 struct CSRAdaptiveKernelParameters
 {
@@ -37,7 +37,7 @@ struct CSRAdaptiveKernelParameters
    /**
     * \brief Number of elements fitting into streamed shared memory.
     */
-   static constexpr size_t StreamedSharedElementsCount() { return StreamedSharedMemory() / sizeof( Value ); };
+   static constexpr size_t StreamedSharedElementsCount() { return StreamedSharedMemory() / SizeOfValue; };
 
    /**
     * \brief Computes number of warps in one CUDA block.