From 9a565126aca84cb100eb7d19b2274b6ff10b2ddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Thu, 9 Jan 2020 22:11:58 +0100
Subject: [PATCH] Reimplementing tridiagonal matrix.

---
 src/TNL/Matrices/SparseMatrix.hpp             |   1 -
 src/TNL/Matrices/Tridiagonal.h                |  62 +-
 src/TNL/Matrices/Tridiagonal.hpp              | 303 +++++--
 src/TNL/Matrices/TridiagonalMatrixRowView.h   |  59 ++
 src/TNL/Matrices/TridiagonalMatrixRowView.hpp |  75 ++
 src/TNL/Matrices/TridiagonalMatrixView.h      | 255 +++---
 src/TNL/Matrices/TridiagonalMatrixView.hpp    | 798 ++++++++----------
 src/TNL/Matrices/TridiagonalRow.h             |  51 --
 src/TNL/Matrices/TridiagonalRow_impl.h        |  78 --
 .../details/TridiagonalMatrixIndexer.h        |  90 ++
 .../Matrices/TridiagonalMatrixTest.h          |   6 +-
 11 files changed, 964 insertions(+), 814 deletions(-)
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.h
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixRowView.hpp
 delete mode 100644 src/TNL/Matrices/TridiagonalRow.h
 delete mode 100644 src/TNL/Matrices/TridiagonalRow_impl.h
 create mode 100644 src/TNL/Matrices/details/TridiagonalMatrixIndexer.h

diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 8dbe53f4dc..6189d43d37 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -515,7 +515,6 @@ forRows( IndexType first, IndexType last, Function& function )
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index f80bc4c185..51e05c8992 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -12,15 +12,14 @@
 
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
 #include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
 
 namespace TNL {
 namespace Matrices {
 
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
 template< typename Real = double,
           typename Device = Devices::Host,
           typename Index = int,
@@ -28,27 +27,23 @@ template< typename Real = double,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > >
 class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 {
-   private:
-      // convenient template alias for controlling the selection of copy-assignment operator
-      template< typename Device2 >
-      using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
-
-      // friend class will be needed for templated assignment operators
-      template< typename Real2, typename Device2, typename Index2 >
-      friend class Tridiagonal;
-
    public:
       using RealType = Real;
       using DeviceType = Device;
       using IndexType = Index;
       using RealAllocatorType = RealAllocator;
       using BaseType = Matrix< Real, Device, Index, RealAllocator >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
-      //using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
-      //using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
-      using RowView = TridiagonalMatrixRowView< SegmentViewType, ValuesViewType >;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
       template< typename _Real = Real,
                 typename _Device = Device,
@@ -70,7 +65,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       void setDimensions( const IndexType rows,
                           const IndexType columns );
 
-      void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      //template< typename Vector >
+      void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities );
 
       template< typename Vector >
       void getCompressedRowLengths( Vector& rowLengths ) const;
@@ -80,8 +76,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       IndexType getMaxRowLength() const;
 
-      template< typename Real2, typename Device2, typename Index2 >
-      void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m );
 
       IndexType getNumberOfMatrixElements() const;
 
@@ -91,11 +87,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void reset();
 
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
+
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const;
 
-      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
-      bool operator != ( const Tridiagonal< Real_, Device_, Index_ >& matrix ) const;
+      RowView getRow( const IndexType& rowIdx );
+
+      const RowView getRow( const IndexType& rowIdx ) const;
 
       void setValue( const RealType& v );
 
@@ -139,8 +139,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       void vectorProduct( const InVector& inVector,
                           OutVector& outVector ) const;
 
-      template< typename Real2, typename Index2 >
-      void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
                       const RealType& matrixMultiplicator = 1.0,
                       const RealType& thisMatrixMultiplicator = 1.0 );
 
@@ -159,9 +159,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
       Tridiagonal& operator=( const Tridiagonal& matrix );
 
       // cross-device copy assignment
-      template< typename Real2, typename Device2, typename Index2,
-                typename = typename Enabler< Device2 >::type >
-      Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+      Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix );
 
       void save( File& file ) const;
 
@@ -177,12 +176,9 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       __cuda_callable__
       IndexType getElementIndex( const IndexType row,
-                                 const IndexType column ) const;
-
-      Containers::Vector< RealType, DeviceType, IndexType > values;
+                                 const IndexType localIdx ) const;
 
-      typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-      friend class TridiagonalDeviceDependentCode< DeviceType >;
+      IndexerType indexer;
 };
 
 } // namespace Matrices
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index c36edec0b7..a7178f86ed 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -1,5 +1,5 @@
 /***************************************************************************
-                          Tridiagonal_impl.h  -  description
+                          Tridiagonal.hpp  -  description
                              -------------------
     begin                : Nov 30, 2013
     copyright            : (C) 2013 by Tomas Oberhuber
@@ -41,6 +41,30 @@ Tridiagonal( const IndexType rows, const IndexType columns )
    this->setDimensions( rows, columns );
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getView() -> ViewType
+{
+   return ViewType( this->values.getView(), indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getConstView() const -> ConstViewType
+{
+   return ConstViewType( this->values.getConstView(), indexer );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -78,7 +102,8 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setDimensions( const IndexType rows, const IndexType columns )
 {
    Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   values.setSize( 3*min( rows, columns ) );
+   this->indexer.setDimensions( rows, columns );
+   this->values.setSize( this->indexer.getStorageSize() );
    this->values = 0.0;
 }
 
@@ -87,24 +112,24 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
+ //  template< typename Vector >
 void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths )
 {
-   if( rowLengths[ 0 ] > 2 )
+   if( max( rowLengths ) > 3 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( rowLengths.getElement( 0 ) > 2 )
       throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   for( Index i = 1; i < diagonalLength-1; i++ )
-      if( rowLengths[ i ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() > this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 1 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 1 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() == this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 2 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 2 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
    if( this->getRows() < this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 3 )
+      if( rowLengths.getElement( this->getRows()-1 ) > 3 )
          throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
 }
 
@@ -146,10 +171,10 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Device2, typename Index2 >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
 void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m )
 {
    this->setDimensions( m.getRows(), m.getColumns() );
 }
@@ -250,6 +275,32 @@ setValue( const RealType& v )
    this->values = v;
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) const -> const RowView
+{
+   return RowView( this->values.getView(), this->indexer );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+__cuda_callable__
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getRow( const IndexType& rowIdx ) -> RowView
+{
+   return RowView( this->values.getView(), this->indexer );
+}
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -259,6 +310,12 @@ bool
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
 }
@@ -275,6 +332,12 @@ addElement( const IndexType row,
             const RealType& value,
             const RealType& thisElementMultiplicator )
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
@@ -289,6 +352,11 @@ Real
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+
    if( abs( column - row ) > 1 )
       return 0.0;
    return this->values.getElement( this->getElementIndex( row, column ) );
@@ -304,7 +372,46 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
+                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+
+         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
+                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -317,7 +424,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -330,7 +437,45 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
-
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -343,6 +488,45 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
@@ -355,7 +539,7 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function ) const
 {
-
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
@@ -368,45 +552,9 @@ void
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forAllRows( Function& function )
 {
-
-}
-
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex )
-{
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
-template< typename Real,
-          typename Device,
-          typename Index,
-          bool RowMajorOrder,
-          typename RealAllocator >
-__cuda_callable__
-const typename Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::MatrixRow
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getRow( const IndexType rowIndex ) const
-{
-   throw Exceptions::NotImplementedError();
-}
-
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -414,8 +562,9 @@ template< typename Real,
           typename RealAllocator >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row,
-                                                                                         const Vector& vector ) const
+typename Vector::RealType 
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
    return TridiagonalDeviceDependentCode< Device >::
              rowVectorProduct( this->rows,
@@ -431,8 +580,9 @@ template< typename Real,
           typename RealAllocator >
    template< typename InVector,
              typename OutVector >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector,
-                                                                 OutVector& outVector ) const
+void 
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -441,7 +591,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorPro
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
@@ -449,10 +599,12 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                    const RealType& matrixMultiplicator,
-                                                    const RealType& thisMatrixMultiplicator )
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
             std::cerr << "This matrix columns: " << this->getColumns() << std::endl
@@ -582,13 +734,14 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-   template< typename Real2, typename Device2, typename Index2, typename >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >&
-Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
                   "unknown device" );
 
    this->setLike( matrix );
@@ -605,7 +758,6 @@ template< typename Real,
 void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const
 {
    Matrix< Real, Device, Index >::save( file );
-   file << this->values;
 }
 
 template< typename Real,
@@ -616,7 +768,7 @@ template< typename Real,
 void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file )
 {
    Matrix< Real, Device, Index >::load( file );
-   file >> this->values;
+   this->indexer.setDimensions( this->getRows(), this->getColumns() );
 }
 
 template< typename Real,
@@ -662,17 +814,17 @@ template< typename Real,
           bool RowMajorOrder,
           typename RealAllocator >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementIndex( const IndexType row,
-                                                                    const IndexType column ) const
+Index Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
-   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
-              std::cerr << " this->rows = " << this->rows
-                   << " row = " << row << " column = " << column );
-   TNL_ASSERT( abs( row - column ) < 2,
-              std::cerr << "row = " << row << " column = " << column << std::endl );
-   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return this->indexer.getGlobalIndex( row, localIdx );
 }
 
+/*
 template<>
 class TridiagonalDeviceDependentCode< Devices::Host >
 {
@@ -774,6 +926,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda >
          MatrixVectorProductCuda( matrix, inVector, outVector );
       }
 };
+ */
 
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h
new file mode 100644
index 0000000000..e77d826e05
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h
@@ -0,0 +1,59 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.h  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView,
+          typename Indexer >
+class TridiagonalMatrixRowView
+{
+   public:
+
+      using RealType = typename ValuesView::RealType;
+      using IndexType = typename ValuesView::IndexType;
+      using ValuesViewType = ValuesView;
+      using IndexerType = Indexer;
+
+      __cuda_callable__
+      TridiagonalMatrixRowView( const IndexType rowIdx,
+                                const ValuesViewType& values,
+                                const IndexerType& indexer );
+
+      __cuda_callable__
+      IndexType getSize() const;
+
+      __cuda_callable__
+      const IndexType getColumnIndex( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      const RealType& getValue( const IndexType localIdx ) const;
+
+      __cuda_callable__
+      RealType& getValue( const IndexType localIdx );
+
+      __cuda_callable__
+      void setElement( const IndexType localIdx,
+                       const RealType& value );
+   protected:
+
+      IndexType rowIdx;
+
+      ValuesViewType values;
+
+      Indexer indexer;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/TridiagonalMatrixRowView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
new file mode 100644
index 0000000000..ba60876b9b
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp
@@ -0,0 +1,75 @@
+/***************************************************************************
+                          TridiagonalMatrixRowView.hpp  -  description
+                             -------------------
+    begin                : Dec 31, 2014
+    copyright            : (C) 2014 by oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+TridiagonalMatrixRowView( const IndexType rowIdx,
+                          const ValuesViewType& values,
+                          const IndexerType& indexer )
+: rowIdx( rowIdx ), values( values ), indexer( indexer )
+{
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getSize() const -> IndexType
+{
+   return indexer.getRowSize();
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getColumnIndex( const IndexType localIdx ) const -> const IndexType
+{
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return rowIdx + localIdx - 1;
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) const -> const RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+auto
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+getValue( const IndexType localIdx ) -> RealType&
+{
+   return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ];
+}
+
+template< typename ValuesView, typename Indexer >
+__cuda_callable__
+void 
+TridiagonalMatrixRowView< ValuesView, Indexer >::
+setElement( const IndexType localIdx,
+            const RealType& value )
+{
+   this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value;
+}
+
+} // namespace Matrices
+} // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 3f57fe1c3e..05f7663c9f 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          Tridiagonal.h  -  description
+                          TridiagonalMatrixView.h  -  description
                              -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -10,200 +10,163 @@
 
 #pragma once
 
-#include <TNL/Matrices/Matrix.h>
+#include <TNL/Matrices/MatrixView.h>
 #include <TNL/Containers/Vector.h>
-#include <TNL/Matrices/TridiagonalRow.h>
+#include <TNL/Matrices/TridiagonalMatrixRowView.h>
+#include <TNL/Containers/Segments/Ellpack.h>
+#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h>
 
 namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
+namespace Matrices {
 
 template< typename Real = double,
           typename Device = Devices::Host,
-          typename Index = int >
-class Tridiagonal : public Matrix< Real, Device, Index >
+          typename Index = int,
+          bool RowMajorOrder = std::is_same< Device, Devices::Host >::value >
+class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 {
-private:
-   // convenient template alias for controlling the selection of copy-assignment operator
-   template< typename Device2 >
-   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
-
-   // friend class will be needed for templated assignment operators
-   template< typename Real2, typename Device2, typename Index2 >
-   friend class Tridiagonal;
-
-public:
-   typedef Real RealType;
-   typedef Device DeviceType;
-   typedef Index IndexType;
-   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
-   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
-   typedef Matrix< Real, Device, Index > BaseType;
-   typedef TridiagonalRow< Real, Index > MatrixRow;
-
-   template< typename _Real = Real,
-             typename _Device = Device,
-             typename _Index = Index >
-   using Self = Tridiagonal< _Real, _Device, _Index >;
-
-   Tridiagonal();
-
-   static String getSerializationType();
-
-   virtual String getSerializationTypeVirtual() const;
+   public:
+      using RealType = Real;
+      using DeviceType = Device;
+      using IndexType = Index;
+      using BaseType = MatrixView< Real, Device, Index >;
+      using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >;
+      using ValuesViewType = typename BaseType::ValuesView;
+      using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >;
+      using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >;
+      using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >;
 
-   void setDimensions( const IndexType rows,
-                       const IndexType columns );
+      // TODO: remove this - it is here only for compatibility with original matrix implementation
+      typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector;
+      typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView;
+      typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView;
 
-   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+      template< typename _Real = Real,
+                typename _Device = Device,
+                typename _Index = Index,
+                bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value >
+      using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >;
 
-   IndexType getRowLength( const IndexType row ) const;
+      TridiagonalMatrixView();
 
-   __cuda_callable__
-   IndexType getRowLengthFast( const IndexType row ) const;
+      TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer );
 
-   IndexType getMaxRowLength() const;
+      ViewType getView();
 
-   template< typename Real2, typename Device2, typename Index2 >
-   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+      ConstViewType getConstView() const;
 
-   IndexType getNumberOfMatrixElements() const;
+      static String getSerializationType();
 
-   IndexType getNumberOfNonzeroMatrixElements() const;
+      virtual String getSerializationTypeVirtual() const;
 
-   IndexType getMaxRowlength() const;
+      void setDimensions( const IndexType rows,
+                          const IndexType columns );
 
-   void reset();
+      template< typename Vector >
+      void getCompressedRowLengths( Vector& rowLengths ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      [[deprecated]]
+      IndexType getRowLength( const IndexType row ) const;
 
-   template< typename Real2, typename Device2, typename Index2 >
-   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+      IndexType getMaxRowLength() const;
 
-   void setValue( const RealType& v );
+      IndexType getNumberOfMatrixElements() const;
 
-   __cuda_callable__
-   bool setElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value );
+      IndexType getNumberOfNonzeroMatrixElements() const;
 
-   bool setElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value );
+      IndexType getMaxRowlength() const;
 
-   __cuda_callable__
-   bool addElementFast( const IndexType row,
-                        const IndexType column,
-                        const RealType& value,
-                        const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
-   bool addElement( const IndexType row,
-                    const IndexType column,
-                    const RealType& value,
-                    const RealType& thisElementMultiplicator = 1.0 );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;
 
-   __cuda_callable__
-   bool setRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements );
+      RowView getRow( const IndexType& rowIdx );
 
-   bool setRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements );
+      const RowView getRow( const IndexType& rowIdx ) const;
 
-   __cuda_callable__
-   bool addRowFast( const IndexType row,
-                    const IndexType* columns,
-                    const RealType* values,
-                    const IndexType elements,
-                    const RealType& thisRowMultiplicator = 1.0 );
+      void setValue( const RealType& v );
 
-   bool addRow( const IndexType row,
-                const IndexType* columns,
-                const RealType* values,
-                const IndexType elements,
-                const RealType& thisRowMultiplicator = 1.0 );
+      bool setElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value );
 
-   __cuda_callable__
-   RealType getElementFast( const IndexType row,
-                            const IndexType column ) const;
+      bool addElement( const IndexType row,
+                       const IndexType column,
+                       const RealType& value,
+                       const RealType& thisElementMultiplicator = 1.0 );
 
-   RealType getElement( const IndexType row,
-                        const IndexType column ) const;
+      RealType getElement( const IndexType row,
+                           const IndexType column ) const;
 
-   __cuda_callable__
-   void getRowFast( const IndexType row,
-                    IndexType* columns,
-                    RealType* values ) const;
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   MatrixRow getRow( const IndexType rowIndex );
+      template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+      void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const;
 
-   __cuda_callable__
-   const MatrixRow getRow( const IndexType rowIndex ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function ) const;
 
-   template< typename Vector >
-   __cuda_callable__
-   typename Vector::RealType rowVectorProduct( const IndexType row,
-                                               const Vector& vector ) const;
+      template< typename Function >
+      void forRows( IndexType first, IndexType last, Function& function );
 
-   template< typename InVector,
-             typename OutVector >
-   void vectorProduct( const InVector& inVector,
-                       OutVector& outVector ) const;
+      template< typename Function >
+      void forAllRows( Function& function ) const;
 
-   template< typename Real2, typename Index2 >
-   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                   const RealType& matrixMultiplicator = 1.0,
-                   const RealType& thisMatrixMultiplicator = 1.0 );
+      template< typename Function >
+      void forAllRows( Function& function );
 
-   template< typename Real2, typename Index2 >
-   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                          const RealType& matrixMultiplicator = 1.0 );
+      template< typename Vector >
+      __cuda_callable__
+      typename Vector::RealType rowVectorProduct( const IndexType row,
+                                                  const Vector& vector ) const;
 
-   template< typename Vector1, typename Vector2 >
-   __cuda_callable__
-   void performSORIteration( const Vector1& b,
-                             const IndexType row,
-                             Vector2& x,
-                             const RealType& omega = 1.0 ) const;
+      template< typename InVector,
+                typename OutVector >
+      void vectorProduct( const InVector& inVector,
+                          OutVector& outVector ) const;
 
-   // copy assignment
-   Tridiagonal& operator=( const Tridiagonal& matrix );
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+                      const RealType& matrixMultiplicator = 1.0,
+                      const RealType& thisMatrixMultiplicator = 1.0 );
 
-   // cross-device copy assignment
-   template< typename Real2, typename Device2, typename Index2,
-             typename = typename Enabler< Device2 >::type >
-   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+      template< typename Real2, typename Index2 >
+      void getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                             const RealType& matrixMultiplicator = 1.0 );
 
-   void save( File& file ) const;
+      template< typename Vector1, typename Vector2 >
+      __cuda_callable__
+      void performSORIteration( const Vector1& b,
+                                const IndexType row,
+                                Vector2& x,
+                                const RealType& omega = 1.0 ) const;
 
-   void load( File& file );
+      // copy assignment
+      TridiagonalMatrixView& operator=( const TridiagonalMatrixView& matrix );
 
-   void save( const String& fileName ) const;
+      // cross-device copy assignment
+      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+      TridiagonalMatrixView& operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix );
 
-   void load( const String& fileName );
+      void save( File& file ) const;
 
-   void print( std::ostream& str ) const;
+      void save( const String& fileName ) const;
 
-protected:
+      void print( std::ostream& str ) const;
 
-   __cuda_callable__
-   IndexType getElementIndex( const IndexType row,
-                              const IndexType column ) const;
+   protected:
 
-   Containers::Vector< RealType, DeviceType, IndexType > values;
+      __cuda_callable__
+      IndexType getElementIndex( const IndexType row,
+                                 const IndexType localIdx ) const;
 
-   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
-   friend class TridiagonalDeviceDependentCode< DeviceType >;
+      IndexerType indexer;
 };
 
 } // namespace Matrices
 } // namespace TNL
 
-#include <TNL/Matrices/Tridiagonal_impl.h>
+#include <TNL/Matrices/TridiagonalMatrixView.hpp>
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 2752f68503..ef893295e8 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -1,8 +1,8 @@
 /***************************************************************************
-                          Tridiagonal_impl.h  -  description
+                          TridiagonalMatrixView.hpp  -  description
                              -------------------
-    begin                : Nov 30, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
     email                : tomas.oberhuber@fjfi.cvut.cz
  ***************************************************************************/
 
@@ -11,109 +11,85 @@
 #pragma once
 
 #include <TNL/Assert.h>
-#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Matrices/TridiagonalMatrixView.h>
 #include <TNL/Exceptions/NotImplementedError.h>
 
 namespace TNL {
-namespace Matrices {   
-
-template< typename Device >
-class TridiagonalDeviceDependentCode;
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >::Tridiagonal()
-{
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getType()
-{
-   return String( "Matrices::Tridiagonal< " ) +
-          String( TNL::getType< Real >() ) +
-          String( ", " ) +
-          String( Device :: getDeviceType() ) +
-          String( ", " ) +
-          String( TNL::getType< Index >() ) +
-          String( " >" );
-}
+namespace Matrices {
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView()
 {
-   return this->getType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationType()
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer )
+: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer )
 {
-   return String( "Matrices::Tridiagonal< " ) +
-          getType< RealType >() + ", " +
-          getType< Device >() + ", " +
-          getType< IndexType >() + " >";
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getView() -> ViewType
 {
-   return this->getSerializationType();
+   return ViewType( this->values.getView(), indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
-                                                        const IndexType columns )
+          typename Index,
+          bool RowMajorOrder >
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getConstView() const -> ConstViewType
 {
-   Matrix< Real, Device, Index >::setDimensions( rows, columns );
-   values.setSize( 3*min( rows, columns ) );
-   this->values.setValue( 0.0 );
+   return ConstViewType( this->values.getConstView(), indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationType()
 {
-   if( rowLengths[ 0 ] > 2 )
-      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
-   for( Index i = 1; i < diagonalLength-1; i++ )
-      if( rowLengths[ i ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() > this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 1 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() == this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 2 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
-   if( this->getRows() < this->getColumns() )
-      if( rowLengths[ this->getRows()-1 ] > 3 )
-         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   return String( "Matrices::Tridiagonal< " ) +
+          TNL::getSerializationType< RealType >() + ", [any_device], " +
+          TNL::getSerializationType< IndexType >() + ", " +
+          ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >";
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder >
+String
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getSerializationTypeVirtual() const
 {
-   return this->getRowLengthFast( row );
+   return this->getSerializationType();
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRowLength( const IndexType row ) const
 {
    const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
    if( row == 0 )
@@ -129,46 +105,47 @@ Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getMaxRowLength() const
 {
    return 3;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
-{
-   this->setDimensions( m.getRows(), m.getColumns() );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfMatrixElements() const
 {
    return 3 * min( this->getRows(), this->getColumns() );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
+          typename Index,
+          bool RowMajorOrder >
+Index
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getNumberOfNonzeroMatrixElements() const
 {
-   IndexType nonzeroElements = 0;
-   for( IndexType i = 0; i < this->values.getSize(); i++ )
-      if( this->values.getElement( i ) != 0 )
-         nonzeroElements++;
-   return nonzeroElements;
+   const auto values_view = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( values_view[ i ] != 0.0 );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 Index
-Tridiagonal< Real, Device, Index >::
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 getMaxRowlength() const
 {
    return 3;
@@ -176,84 +153,103 @@ getMaxRowlength() const
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::reset()
-{
-   Matrix< Real, Device, Index >::reset();
-   this->values.reset();
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
+{
+   if( RowMajorOrder == RowMajorOrder_ )
+      return this->values == matrix.values;
+   else
+   {
+      TNL_ASSERT( false, "TODO" );
+   }
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const
 {
-   return this->values == matrix.values;
+   return ! this->operator==( matrix );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2 >
-bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setValue( const RealType& v )
 {
-   return this->values != matrix.values;
+   this->values = v;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) const -> const RowView
 {
-   this->values.setValue( v );
+   return RowView( rowIdx, this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 __cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value )
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getRow( const IndexType& rowIdx ) -> RowView
 {
-   this->values[ this->getElementIndex( row, column ) ] = value;
-   return true;
+   return RowView( rowIdx, this->values.getView(), this->indexer );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value )
-{
+          typename Index,
+          bool RowMajorOrder >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+setElement( const IndexType row, const IndexType column, const RealType& value )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    this->values.setElement( this->getElementIndex( row, column ), value );
    return true;
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
-                                                                  const IndexType column,
-                                                                  const RealType& value,
-                                                                  const RealType& thisElementMultiplicator )
-{
-   const Index i = this->getElementIndex( row, column );
-   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
-   return true;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
-                                                              const IndexType column,
-                                                              const RealType& value,
-                                                              const RealType& thisElementMultiplicator )
-{
+          typename Index,
+          bool RowMajorOrder >
+bool
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addElement( const IndexType row,
+            const IndexType column,
+            const RealType& value,
+            const RealType& thisElementMultiplicator )
+{
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
+   if( abs( row - column ) > 1 )
+      throw std::logic_error( "Wrong matrix element coordinates in tridiagonal matrix." );
    const Index i = this->getElementIndex( row, column );
    this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
    return true;
@@ -261,180 +257,230 @@ bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRowFast( row, columns, values, elements, 0.0 );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements )
+          typename Index,
+          bool RowMajorOrder >
+Real
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElement( const IndexType row, const IndexType column ) const
 {
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   return this->addRow( row, columns, values, elements, 0.0 );
-}
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( column, 0, "" );
+   TNL_ASSERT_LT( column, this->getColumns(), "" );
 
-template< typename Real,
-          typename Device,
-          typename Index >
-__cuda_callable__
-bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
-                                                              const IndexType* columns,
-                                                              const RealType* values,
-                                                              const IndexType elements,
-                                                              const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType& column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElementFast( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
-                                                          const IndexType* columns,
-                                                          const RealType* values,
-                                                          const IndexType elements,
-                                                          const RealType& thisRowMultiplicator )
-{
-   TNL_ASSERT( elements <= this->columns,
-            std::cerr << " elements = " << elements
-                 << " this->columns = " << this->columns );
-   if( elements > 3 )
-      return false;
-   for( IndexType i = 0; i < elements; i++ )
-   {
-      const IndexType column = columns[ i ];
-      if( column < row - 1 || column > row + 1 )
-         return false;
-      addElement( row, column, values[ i ], thisRowMultiplicator );
-   }
-   return true;
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         keep( 0, reduce( fetch( 0, 0, i_0, values_view[ i_0 ] ),
+                          fetch( 0, 1, i_1, values_view[ i_1 ] ) ) );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+
+         keep( rowIdx, reduce( reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                                       fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ),
+                               fetch( rowIdx, rowIdx + 1, i_2, values_view[ i_2] ) ) );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         keep( rowIdx, reduce( fetch( rowIdx, rowIdx - 1, i_0, values_view[ i_0 ] ),
+                               fetch( rowIdx, rowIdx, i_1, values_view[ i_1 ] ) ) );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         keep( rowIdx, fetch( rowIdx, rowIdx, i_0, values_view[ i_0 ] ) );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
-                                                                  const IndexType column ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Fetch, typename Reduce, typename Keep, typename FetchReal >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values[ this->getElementIndex( row, column ) ];
+   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
-                                                              const IndexType column ) const
-{
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function ) const
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
+      }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
-                                                              IndexType* columns,
-                                                              RealType* values ) const
-{
-   IndexType elementPointer( 0 );
-   for( IndexType i = -1; i <= 1; i++ )
-   {
-      const IndexType column = row + 1;
-      if( column >= 0 && column < this->getColumns() )
+          typename Index,
+          bool RowMajorOrder >
+  template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forRows( IndexType first, IndexType last, Function& function )
+{
+   const auto values_view = this->values.getConstView();
+   const auto indexer_ = this->indexer;
+   const auto rows = this->getRows();
+   const auto columns = this->getColumns();
+   const auto size = this->size;
+   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
+      //bool compute;
+      if( rowIdx == 0 )
       {
-         columns[ elementPointer ] = column;
-         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
-         elementPointer++;
+         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
+         function( 0, 1, rowIdx,     values_view[ i_0 ] );
+         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
+         return;
       }
-   }
+      if( rowIdx < size || columns > rows )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
+         return;
+      }
+      if( rows == columns )
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
+         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
+         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+      }
+      else
+      {
+         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
+         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
+      }
+   };
+   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function ) const
 {
-   if( std::is_same< Device, Devices::Host >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        1 );
-   if( std::is_same< Device, Devices::Cuda >::value )
-      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
-                        rowIndex,
-                        this->getColumns(),
-                        this->rows );
+   this->forRows( 0, this->getRows(), function );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-__cuda_callable__
-const typename Tridiagonal< Real, Device, Index >::MatrixRow
-Tridiagonal< Real, Device, Index >::
-getRow( const IndexType rowIndex ) const
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Function >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+forAllRows( Function& function )
 {
-   throw Exceptions::NotImplementedError();
+   this->forRows( 0, this->getRows(), function );
 }
 
-
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 template< typename Vector >
 __cuda_callable__
-typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
-                                                                                         const Vector& vector ) const
+typename Vector::RealType 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
-   return TridiagonalDeviceDependentCode< Device >::
-             rowVectorProduct( this->rows,
-                               this->values,
-                               row,
-                               vector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename InVector,
              typename OutVector >
-void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
-                                                                 OutVector& outVector ) const
+void 
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    TNL_ASSERT( this->getColumns() == inVector.getSize(),
             std::cerr << "Matrix columns: " << this->getColumns() << std::endl
@@ -443,16 +489,19 @@ void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector
                std::cerr << "Matrix rows: " << this->getRows() << std::endl
                     << "Vector size: " << outVector.getSize() << std::endl );
 
-   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                    const RealType& matrixMultiplicator,
-                                                    const RealType& thisMatrixMultiplicator )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix,
+           const RealType& matrixMultiplicator,
+           const RealType& thisMatrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
             std::cerr << "This matrix columns: " << this->getColumns() << std::endl
@@ -494,10 +543,13 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename Real2, typename Index2 >
-void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
-                                                                    const RealType& matrixMultiplicator )
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
+                  const RealType& matrixMultiplicator )
 {
    TNL_ASSERT( this->getRows() == matrix.getRows(),
                std::cerr << "This matrix rows: " << this->getRows() << std::endl
@@ -541,13 +593,16 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
    template< typename Vector1, typename Vector2 >
 __cuda_callable__
-void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
-                                                              const IndexType row,
-                                                              Vector2& x,
-                                                              const RealType& omega ) const
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+performSORIteration( const Vector1& b,
+                     const IndexType row,
+                     Vector2& x,
+                     const RealType& omega ) const
 {
    RealType sum( 0.0 );
    if( row > 0 )
@@ -561,9 +616,11 @@ void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
 // copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+          typename Index,
+          bool RowMajorOrder >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const TridiagonalMatrixView& matrix )
 {
    this->setLike( matrix );
    this->values = matrix.values;
@@ -573,14 +630,16 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
 // cross-device copy assignment
 template< typename Real,
           typename Device,
-          typename Index >
-   template< typename Real2, typename Device2, typename Index2, typename >
-Tridiagonal< Real, Device, Index >&
-Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+          typename Index,
+          bool RowMajorOrder >
+   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >&
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+operator=( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix )
 {
    static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
                   "unknown device" );
-   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+   static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value,
                   "unknown device" );
 
    this->setLike( matrix );
@@ -591,42 +650,29 @@ Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( File& file ) const
-{
-   Matrix< Real, Device, Index >::save( file );
-   file << this->values;
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( File& file )
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const
 {
-   Matrix< Real, Device, Index >::load( file );
-   file >> this->values;
+   MatrixView< Real, Device, Index >::save( file );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
+          typename Index,
+          bool RowMajorOrder >
+void
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+save( const String& fileName ) const
 {
    Object::save( fileName );
 }
 
 template< typename Real,
           typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::load( const String& fileName )
-{
-   Object::load( fileName );
-}
-
-template< typename Real,
-          typename Device,
-          typename Index >
-void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
+          typename Index,
+          bool RowMajorOrder >
+void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -640,120 +686,18 @@ void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
 
 template< typename Real,
           typename Device,
-          typename Index >
+          typename Index,
+          bool RowMajorOrder >
 __cuda_callable__
-Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
-                                                                    const IndexType column ) const
+Index TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getElementIndex( const IndexType row, const IndexType localIdx ) const
 {
-   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
-              std::cerr << " this->rows = " << this->rows
-                   << " row = " << row << " column = " << column );
-   TNL_ASSERT( abs( row - column ) < 2,
-              std::cerr << "row = " << row << " column = " << column << std::endl );
-   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+   TNL_ASSERT_GE( row, 0, "" );
+   TNL_ASSERT_LT( row, this->getRows(), "" );
+   TNL_ASSERT_GE( localIdx, 0, "" );
+   TNL_ASSERT_LT( localIdx, 3, "" );
+   return this->indexer.getGlobalIndex( row, localIdx );
 }
 
-template<>
-class TridiagonalDeviceDependentCode< Devices::Host >
-{
-   public:
-
-      typedef Devices::Host Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return 2*row + column;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType  >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ 1 ];
-         Index i = 3 * row;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i - 1 ] +
-                   vector[ row ] * values[ i ];
-         return vector[ row - 1 ] * values[ i - 1 ] +
-                vector[ row ] * values[ i ] +
-                vector[ row + 1 ] * values[ i + 1 ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-#ifdef HAVE_OPENMP
-#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
-#endif
-         for( Index row = 0; row < matrix.getRows(); row ++ )
-            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
-      }
-};
-
-template<>
-class TridiagonalDeviceDependentCode< Devices::Cuda >
-{
-   public:
- 
-      typedef Devices::Cuda Device;
-
-      template< typename Index >
-      __cuda_callable__
-      static Index getElementIndex( const Index rows,
-                                    const Index row,
-                                    const Index column )
-      {
-         return ( column - row + 1 )*rows + row - 1;
-      }
-
-      template< typename Vector,
-                typename Index,
-                typename ValuesType >
-      __cuda_callable__
-      static typename Vector::RealType rowVectorProduct( const Index rows,
-                                                         const ValuesType& values,
-                                                         const Index row,
-                                                         const Vector& vector )
-      {
-         if( row == 0 )
-            return vector[ 0 ] * values[ 0 ] +
-                   vector[ 1 ] * values[ rows - 1 ];
-         Index i = row - 1;
-         if( row == rows - 1 )
-            return vector[ row - 1 ] * values[ i ] +
-                   vector[ row ] * values[ i + rows ];
-         return vector[ row - 1 ] * values[ i ] +
-                vector[ row ] * values[ i + rows ] +
-                vector[ row + 1 ] * values[ i + 2*rows ];
-      }
-
-      template< typename Real,
-                typename Index,
-                typename InVector,
-                typename OutVector >
-      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-      {
-         MatrixVectorProductCuda( matrix, inVector, outVector );
-      }
-};
-
 } // namespace Matrices
 } // namespace TNL
diff --git a/src/TNL/Matrices/TridiagonalRow.h b/src/TNL/Matrices/TridiagonalRow.h
deleted file mode 100644
index 9d06b39e18..0000000000
--- a/src/TNL/Matrices/TridiagonalRow.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-class TridiagonalRow
-{
-   public:
-
-      __cuda_callable__
-      TridiagonalRow();
-
-      __cuda_callable__
-      TridiagonalRow( Real* values,
-                               const Index row,
-                               const Index columns,
-                               const Index step );
-
-      __cuda_callable__
-      void bind( Real* values,
-                 const Index row,
-                 const Index columns,
-                 const Index step );
-
-      __cuda_callable__
-      void setElement( const Index& elementIndex,
-                       const Index& column,
-                       const Real& value );
-
-   protected:
-
-      Real* values;
-
-      Index row, columns, step;
-};
-
-} // namespace Matrices
-} // namespace TNL
-
-#include <TNL/Matrices/TridiagonalRow_impl.h>
diff --git a/src/TNL/Matrices/TridiagonalRow_impl.h b/src/TNL/Matrices/TridiagonalRow_impl.h
deleted file mode 100644
index f5b7e842a4..0000000000
--- a/src/TNL/Matrices/TridiagonalRow_impl.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/***************************************************************************
-                          TridiagonalRow_impl.h  -  description
-                             -------------------
-    begin                : Dec 31, 2014
-    copyright            : (C) 2014 by oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#pragma once
-
-namespace TNL {
-namespace Matrices {   
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow()
-: values( 0 ),
-  row( 0 ),
-  columns( 0 ),
-  step( 0 )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-TridiagonalRow< Real, Index >::
-TridiagonalRow( Real* values,
-                         const Index row,
-                         const Index columns,
-                         const Index step )
-: values( values ),
-  row( row ),
-  columns( columns ),
-  step( step )
-{
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-bind( Real* values,
-      const Index row,
-      const Index columns,
-      const Index step )
-{
-   this->values = values;
-   this->row = row;
-   this->columns = columns;
-   this->step = step;
-}
-
-template< typename Real, typename Index >
-__cuda_callable__
-void
-TridiagonalRow< Real, Index >::
-setElement( const Index& elementIndex,
-            const Index& column,
-            const Real& value )
-{
-   TNL_ASSERT( this->values, );
-   TNL_ASSERT( this->step > 0,);
-   TNL_ASSERT( column >= 0 && column < this->columns,
-              std::cerr << "column = " << columns << " this->columns = " << this->columns );
-   TNL_ASSERT( abs( column - row ) <= 1,
-              std::cerr << "column = " << column << " row =  " << row );
-
-   /****
-    * this->values stores an adress of the diagonal element
-    */
-   this->values[ ( column - row ) * this->step ] = value;
-}
-
-} // namespace Matrices
-} // namespace TNL
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
new file mode 100644
index 0000000000..2f245c38f6
--- /dev/null
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -0,0 +1,90 @@
+/***************************************************************************
+                          TridiagonalMatrixIndexer.h  -  description
+                             -------------------
+    begin                : Jan 9, 2020
+    copyright            : (C) 2020 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+namespace TNL {
+   namespace Matrices {
+      namespace details {
+
+template< typename Index,
+          bool RowMajorOrder >
+class TridiagonalMatrixIndexer
+{
+   public:
+
+      using IndexType = Index;
+
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer()
+      : rows( 0 ), columns( 0 ), size( 0 ){};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns )
+      : rows( rows ), columns( columns ), size( TNL::min( rows, columns ) ) {};
+
+      __cuda_callable__
+      TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer )
+      : rows( indexer.rows ), columns( indexer.columns ), size( indexer.size ) {};
+
+      void setDimensions( const IndexType& rows, const IndexType& columns )
+      {
+         this->rows = rows;
+         this->columns = columns;
+         this->size = min( rows, columns );
+      };
+
+      __cuda_callable__
+      IndexType getRowSize( const IndexType rowIdx ) const
+      {
+         if( rowIdx == 0 )
+            return 2;
+         if( columns <= rows )
+         {
+            if( rowIdx == columns - 1 )
+               return 2;
+            if( rowIdx == columns )
+               return 1;
+         }
+         return 3;
+      };
+
+      __cuda_callable__
+      IndexType getRows() const { return this->rows; };
+
+      __cuda_callable__
+      IndexType getColumns() const { return this->rows; };
+
+      __cuda_callable__
+      IndexType getStorageSize() const { return 3 * this->size; };
+
+      __cuda_callable__
+      IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const
+      {
+         TNL_ASSERT_GE( localIdx, 0, "" );
+         TNL_ASSERT_LT( localIdx, 3, "" );
+         TNL_ASSERT_GE( rowIdx, 0, "" );
+         TNL_ASSERT_LT( rowIdx, this->rows, "" );
+         
+         if( RowMajorOrder )
+            return 3 * rowIdx + localIdx;
+         else
+            return localIdx * size + rowIdx;
+      };
+
+      protected:
+
+         IndexType rows, columns, size;
+};
+      } //namespace details
+   } // namespace Materices
+} // namespace TNL
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index 40cecb2bde..962f8c82df 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -587,7 +587,7 @@ void test_SetRow()
          { 2, 3, 4, 5, 6 } };
       auto row = matrix_view.getRow( rowIdx );
       for( IndexType i = 0; i < 5; i++ )
-        row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] );
+        row.setElement( i, values[ rowIdx ][ i ] );
    };
    TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f );
 
@@ -1172,7 +1172,7 @@ void test_AssignmentOperator()
    TridiagonalHost hostMatrix( rows, columns );
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j <= i; j++ )
-         hostMatrix( i, j ) = i + j;
+         hostMatrix.setElement( i, j,  i + j );
 
    Matrix matrix( rows, columns );
    matrix.getValues() = 0.0;
@@ -1369,7 +1369,7 @@ using MatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( MatrixTest, MatrixTypes );
 
-TYPED_TEST( Matrix, getSerializationType )
+TYPED_TEST( MatrixTest, getSerializationType )
 {
    test_GetSerializationType();
 }
-- 
GitLab