diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h
index d2827015695da2183c16c152e10d1d711149b51e..e7e3ab6b279ce53f9db662fc58efd8b53109f4b4 100644
--- a/src/TNL/Matrices/Tridiagonal.h
+++ b/src/TNL/Matrices/Tridiagonal.h
@@ -50,13 +50,15 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
                 typename _Index = Index >
       using Self = Tridiagonal< _Real, _Device, _Index >;
 
+      static constexpr bool getRowMajorOrder() { return RowMajorOrder; };
+
       Tridiagonal();
 
       Tridiagonal( const IndexType rows, const IndexType columns );
 
-      ViewType getView();
+      ViewType getView() const; // TODO: remove const
 
-      ConstViewType getConstView() const;
+      //ConstViewType getConstView() const;
 
       static String getSerializationType();
 
@@ -168,6 +170,10 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator >
 
       void print( std::ostream& str ) const;
 
+      const IndexerType& getIndexer() const;
+
+      IndexerType& getIndexer();
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp
index c6d359d3be42c618eaf28c33a0b320f5d0dee1f7..6c09238ff0628b06584007d4336631d4b28650f9 100644
--- a/src/TNL/Matrices/Tridiagonal.hpp
+++ b/src/TNL/Matrices/Tridiagonal.hpp
@@ -49,12 +49,13 @@ template< typename Real,
           typename RealAllocator >
 auto
 Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
-getView() -> ViewType
+getView() const -> ViewType
 {
-   return ViewType( this->values.getView(), indexer );
+   // TODO: fix when getConstView works
+   return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer );
 }
 
-template< typename Real,
+/*template< typename Real,
           typename Device,
           typename Index,
           bool RowMajorOrder,
@@ -64,7 +65,7 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getConstView() const -> ConstViewType
 {
    return ConstViewType( this->values.getConstView(), indexer );
-}
+}*/
 
 template< typename Real,
           typename Device,
@@ -146,19 +147,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getCompressedRowLengths( Vector& rowLengths ) const
 {
    return this->view.getCompressedRowLengths( rowLengths );
-   /*rowLengths.setSize( this->getRows() );
-   rowLengths = 0;
-   auto rowLengths_view = rowLengths.getView();
-   auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType {
-      return ( value != 0.0 );
-   };
-   auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) {
-      aux += a;
-   };
-   auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable {
-      rowLengths_view[ rowIdx ] = value;
-   };
-   this->allRowsReduction( fetch, reduce, keep, 0 );*/
 }
 
 template< typename Real,
@@ -171,7 +159,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRowLength( const IndexType row ) const
 {
    return this->view.getRowLength( row );
-   //return this->indexer.getRowSize( row );
 }
 
 template< typename Real,
@@ -209,11 +196,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
    return this->view.getNumberOfNonzeroMatrixElements();
-   /*const auto values_view = this->values.getConstView();
-   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
-      return ( values_view[ i ] != 0.0 );
-   };
-   return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 );*/
 }
 
 template< typename Real,
@@ -283,7 +265,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) const -> const RowView
 {
    return this->view.getRow( rowIdx );
-   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -297,7 +278,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getRow( const IndexType& rowIdx ) -> RowView
 {
    return this->view.getRow( rowIdx );
-   //return RowView( this->values.getView(), this->indexer );
 }
 
 template< typename Real,
@@ -310,18 +290,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 setElement( const IndexType row, const IndexType column, const RealType& value )
 {
    return this->view.setElement( row, column, value );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
-   {
-      std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
-      throw std::logic_error( msg.str() );
-   }
-   this->values.setElement( this->getElementIndex( row, column ), value );
-   return true;*/
 }
 
 template< typename Real,
@@ -337,19 +305,6 @@ addElement( const IndexType row,
             const RealType& thisElementMultiplicator )
 {
    return this->view.addElement( row, column, value, thisElementMultiplicator );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-   if( abs( row - column ) > 1 )
-   {
-      std::stringstream msg;
-      msg << "Wrong matrix element coordinates ( "  << row << ", " << column << " ) in tridiagonal matrix.";
-      throw std::logic_error( msg.str() );
-   }
-   const Index i = this->getElementIndex( row, column );
-   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
-   return true;*/
 }
 
 template< typename Real,
@@ -362,14 +317,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 getElement( const IndexType row, const IndexType column ) const
 {
    return this->view.getElement( row, column );
-   /*TNL_ASSERT_GE( row, 0, "" );
-   TNL_ASSERT_LT( row, this->getRows(), "" );
-   TNL_ASSERT_GE( column, 0, "" );
-   TNL_ASSERT_LT( column, this->getColumns(), "" );
-
-   if( abs( column - row ) > 1 )
-      return 0.0;
-   return this->values.getElement( this->getElementIndex( row, column ) );*/
 }
 
 template< typename Real,
@@ -383,39 +330,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
    this->view.rowsReduction( first, last, fetch, reduce, keep, zero );
-   /*using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) );
-   const auto values_view = this->values.getConstView();
-   const auto indexer = this->indexer;
-   const auto zero = zero_;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      Real_ sum( zero );
-      if( rowIdx == 0 )
-      {
-         reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) );
-         reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) );
-         keep( 0, sum );
-         return;
-      }
-      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
-      {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) );
-         keep( rowIdx, sum );
-         return;
-      }
-      if( indexer.getRows() == indexer.getColumns() )
-      {
-         reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-         reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
-         keep( rowIdx, sum );
-      }
-      else
-      {
-         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -442,45 +356,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    this->view.forRows( first, last, function );
-   /*const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
-      if( rowIdx == 0 )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
-      }
-      if( rows == columns )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-      }
-      else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -494,45 +369,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 forRows( IndexType first, IndexType last, Function& function )
 {
    this->view.forRows( first, last, function );
-   /*const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
-   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
-      if( rowIdx == 0 )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
-      }
-      if( rows == columns )
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-      }
-      else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
-   };
-   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );*/
 }
 
 template< typename Real,
@@ -573,11 +409,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 rowVectorProduct( const IndexType row, const Vector& vector ) const
 {
    return this->view.rowVectorProduct();
-   /*return TridiagonalDeviceDependentCode< Device >::
-             rowVectorProduct( this->rows,
-                               this->values,
-                               row,
-                               vector );*/
 }
 
 template< typename Real,
@@ -592,14 +423,6 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
    this->view.vectorProduct( inVector, outVector );
-   /*TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );*/
-
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
 }
 
 template< typename Real,
@@ -614,14 +437,7 @@ addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
-                 << "This matrix rows: " << this->getRows() << std::endl );
-
-   if( thisMatrixMultiplicator == 1.0 )
-      this->values += matrixMultiplicator * matrix.values;
-   else
-      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+   this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator );
 }
 
 #ifdef HAVE_CUDA
@@ -753,11 +569,31 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca
                   "unknown device" );
 
    this->setLike( matrix );
-
-   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
+   if( RowMajorOrder == RowMajorOrder_ )
+      this->values = matrix.getValues();
+   else
+   {
+      if( std::is_same< Device, Device_ >::value )
+      {
+         const auto matrix_view = matrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+      else
+      {
+         Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix;
+         auxMatrix = matrix;
+         const auto matrix_view = auxMatrix.getView();
+         auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+            value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+         };
+         this->forAllRows( f );
+      }
+   }
 }
 
-
 template< typename Real,
           typename Device,
           typename Index,
@@ -777,6 +613,7 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil
 {
    Matrix< Real, Device, Index >::load( file );
    this->indexer.setDimensions( this->getRows(), this->getColumns() );
+   this->view = this->getView();
 }
 
 template< typename Real,
@@ -804,7 +641,9 @@ template< typename Real,
           typename Index,
           bool RowMajorOrder,
           typename RealAllocator >
-void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const
+void
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+print( std::ostream& str ) const
 {
    for( IndexType row = 0; row < this->getRows(); row++ )
    {
@@ -816,6 +655,30 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::print( st
    }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+auto
+Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
index 78593acf511153ab4051be71f5895210cdafb93a..29006279390150cbd80a469c8ffed124ca67be17 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.h
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -146,6 +146,12 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
 
       void print( std::ostream& str ) const;
 
+      __cuda_callable__
+      const IndexerType& getIndexer() const;
+
+      __cuda_callable__
+      IndexerType& getIndexer();
+
    protected:
 
       __cuda_callable__
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
index 83ff6035dd36b628f02e0ac4d324b890dfb05b82..4d4950c4e92be8568219f0102371b0d9e86b0979 100644
--- a/src/TNL/Matrices/TridiagonalMatrixView.hpp
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -297,7 +297,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
          keep( 0, sum );
          return;
       }
-      if( rowIdx < indexer.getSize() || indexer.getColumns() > indexer.getRows() )
+      if( rowIdx + 1 < indexer.getColumns() )
       {
          reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
          reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
@@ -305,7 +305,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
          keep( rowIdx, sum );
          return;
       }
-      if( indexer.getRows() == indexer.getColumns() )
+      if( rowIdx < indexer.getColumns() )
       {
          reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
          reduce( sum, fetch( rowIdx, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) );
@@ -313,7 +313,7 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke
       }
       else
       {
-         keep( rowIdx, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
+         keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) );
       }
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
@@ -328,7 +328,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const
 {
-   this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero );
+   this->rowsReduction( 0, this->indexer.getNonEmptyRowsCount(), fetch, reduce, keep, zero );
 }
 
 template< typename Real,
@@ -341,42 +341,26 @@ TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function ) const
 {
    const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
       }
-      if( rows == columns )
+      else if( rowIdx < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
       }
       else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -390,43 +374,27 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forRows( IndexType first, IndexType last, Function& function )
 {
-   const auto values_view = this->values.getConstView();
-   const auto indexer_ = this->indexer;
-   const auto rows = this->getRows();
-   const auto columns = this->getColumns();
-   const auto size = this->size;
+   auto values_view = this->values.getView();
+   const auto indexer = this->indexer;
    auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
-      //bool compute;
       if( rowIdx == 0 )
       {
-         IndexType i_0 = indexer.getGlobalIndex( 0, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( 0, 1 );
-         function( 0, 1, rowIdx,     values_view[ i_0 ] );
-         function( 0, 2, rowIdx + 1, values_view[ i_1 ] );
-         return;
-      }
-      if( rowIdx < size || columns > rows )
+         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
+         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
+      } 
+      else if( rowIdx + 1 < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         IndexType i_2 = indexer.getGlobalIndex( rowIdx, 2 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
-         function( rowIdx, 2, rowIdx + 1, values_view[ i_2 ] );
-         return;
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
+         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
       }
-      if( rows == columns )
+      else if( rowIdx < indexer.getColumns() )
       {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         IndexType i_1 = indexer.getGlobalIndex( rowIdx, 1 );
-         function( rowIdx, 0, rowIdx - 1, values_view[ i_0 ] );
-         function( rowIdx, 1, rowIdx,     values_view[ i_1 ] );
+         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
+         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
       }
       else
-      {
-         IndexType i_0 = indexer.getGlobalIndex( rowIdx, 0 );
-         function( rowIdx, 0, rowIdx, values_view[ i_0 ] );
-      }
+         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
    };
    Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
 }
@@ -440,7 +408,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function ) const
 {
-   this->forRows( 0, this->getRows(), function );
+   this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function );
 }
 
 template< typename Real,
@@ -452,7 +420,7 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 forAllRows( Function& function )
 {
-   this->forRows( 0, this->getRows(), function );
+   this->forRows( 0, this->indexer.getNonEmptyRowsCount(), function );
 }
 
 template< typename Real,
@@ -477,14 +445,22 @@ void
 TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
 vectorProduct( const InVector& inVector, OutVector& outVector ) const
 {
-   TNL_ASSERT( this->getColumns() == inVector.getSize(),
-            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
-                 << "Vector size: " << inVector.getSize() << std::endl );
-   TNL_ASSERT( this->getRows() == outVector.getSize(),
-               std::cerr << "Matrix rows: " << this->getRows() << std::endl
-                    << "Vector size: " << outVector.getSize() << std::endl );
+   TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." );
+   TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." );
 
-   //DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+   const auto inVectorView = inVector.getConstView();
+   auto outVectorView = outVector.getView();
+   const auto valuesView = this->values.getConstView();
+   auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType {
+      return value * inVectorView[ column ];
+   };
+   auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) {
+      sum += value;
+   };
+   auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable {
+      outVectorView[ row ] = value;
+   };
+   this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 );
 }
 
 template< typename Real,
@@ -498,18 +474,41 @@ addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >
            const RealType& matrixMultiplicator,
            const RealType& thisMatrixMultiplicator )
 {
-   TNL_ASSERT( this->getRows() == matrix.getRows(),
-            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
-                 << "This matrix rows: " << this->getRows() << std::endl );
+   TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." );
+   TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." );
 
-   if( thisMatrixMultiplicator == 1.0 )
-      this->values += matrixMultiplicator * matrix.values;
+   if( RowMajorOrder == RowMajorOrder_ )
+   {
+      if( thisMatrixMultiplicator == 1.0 )
+         this->values += matrixMultiplicator * matrix.getValues();
+      else
+         this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues();
+   }
    else
-      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+   {
+      const auto matrix_view = matrix;
+      const auto matrixMult = matrixMultiplicator;
+      const auto thisMult = thisMatrixMultiplicator;
+      auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable {
+         value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ];
+      };
+      if( thisMult == 0.0 )
+         this->forAllRows( add0 );
+      else if( thisMult == 1.0 )
+         this->forAllRows( add1 );
+      else
+         this->forAllRows( addGen );
+   }
 }
 
 #ifdef HAVE_CUDA
-template< typename Real,
+/*template< typename Real,
           typename Real2,
           typename Index,
           typename Index2 >
@@ -533,7 +532,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De
                                     rowIdx,
                                     matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
    }
-}
+}*/
 #endif
 
 template< typename Real,
@@ -563,7 +562,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
    if( std::is_same< Device, Devices::Cuda >::value )
    {
 #ifdef HAVE_CUDA
-      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      /*Tridiagonal* kernel_this = Cuda::passToDevice( *this );
       typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
       InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
       dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
@@ -581,7 +580,7 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix,
       }
       Cuda::freeFromDevice( kernel_this );
       Cuda::freeFromDevice( kernel_inMatrix );
-      TNL_CHECK_CUDA_DEVICE;
+      TNL_CHECK_CUDA_DEVICE;*/
 #endif
    }
 }
@@ -644,6 +643,30 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os
    }
 }
 
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() const -> const IndexerType&
+{
+   return this->indexer;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder >
+__cuda_callable__
+auto
+TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::
+getIndexer() -> IndexerType&
+{
+   return this->indexer;
+}
+
 template< typename Real,
           typename Device,
           typename Index,
diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
index d9fdd0c23cc5bc93ea58b62f31130b9a3674adff..6d3377b4f938399452230d3e5a3739d26e4e755f 100644
--- a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
+++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h
@@ -65,7 +65,7 @@ class TridiagonalMatrixIndexer
       const IndexType& getColumns() const { return this->columns; };
 
       __cuda_callable__
-      const IndexType& getSize() const { return this->nonEmptyRows; };
+      const IndexType& getNonEmptyRowsCount() const { return this->nonEmptyRows; };
       __cuda_callable__
       IndexType getStorageSize() const { return 3 * this->nonEmptyRows; };
 
diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt
index 28749540556121c826e5de3457ddad9f3996201f..4b95380c4477b33184e6c80946d9f0aebd95e04c 100644
--- a/src/UnitTests/Matrices/CMakeLists.txt
+++ b/src/UnitTests/Matrices/CMakeLists.txt
@@ -13,8 +13,8 @@ IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+#   CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} )
@@ -42,9 +42,9 @@ ELSE(  BUILD_CUDA )
    TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
    TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
-   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
-   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
-   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
+#   ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp )
+#   TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} )
+#   TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} )
 
    ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cpp )
    TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} )
@@ -65,7 +65,7 @@ ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${C
 ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
+#ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} )
diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
index dcd14302a446b32171574bf805921ba4b63d322a..2c476670b7ecffd7c985475d5f154acefcc2d4d8 100644
--- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h
+++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h
@@ -8,6 +8,7 @@
 
 /* See Copyright Notice in tnl/Copyright */
 
+#include <sstream>
 #include <TNL/Devices/Host.h>
 #include <TNL/Matrices/Matrix.h>
 #include <TNL/Matrices/Tridiagonal.h>
@@ -774,8 +775,11 @@ void test_VectorProduct()
    RealType value = 1;
    for( IndexType i = 0; i < rows; i++ )
       for( IndexType j = 0; j < cols; j++)
+      {
          if( abs( i - j ) <= 1 )
-            m.setElement( i, j, value++ );
+            m.setElement( i, j, value );
+         value++;
+      }
 
    using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
 
@@ -787,7 +791,6 @@ void test_VectorProduct()
 
    m.vectorProduct( inVector, outVector);
 
-   std::cerr << outVector << std::endl;
    EXPECT_EQ( outVector.getElement( 0 ),  6 );
    EXPECT_EQ( outVector.getElement( 1 ), 36 );
    EXPECT_EQ( outVector.getElement( 2 ), 66 );
@@ -795,122 +798,123 @@ void test_VectorProduct()
    EXPECT_EQ( outVector.getElement( 4 ), 40 );
 }
 
-template< typename Matrix >
+template< typename Matrix1, typename Matrix2 = Matrix1 >
 void test_AddMatrix()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix1::RealType;
+   using DeviceType = typename Matrix1::DeviceType;
+   using IndexType = typename Matrix1::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix1 m( rows, cols );
 
-/*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    Matrix m2;
-    m2.reset();
-    m2.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  3  4  5  0 |
+    *    |  0  6  7  8 |
+    *    |  0  0  9 10 |
+    *    \  0  0  0 11 /
+    */
+   Matrix2 m2( rows, cols );
 
-    RealType newValue = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++)
+   RealType newValue = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++)
+         if( abs( i - j ) <= 1 )
             m2.setElement( i, j, newValue++ );
 
-    /*
- * Sets up the following 5x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
+   /*
+    * Compute the following 5x4 dense matrix:
+    *
+    *  /  1  2  0  0 \       /  1  2  0  0 \    /  3  6  0  0 \
+    *  |  5  6  7  0 |       |  3  4  5  0 |    | 11 14 17  0 |
+    *  |  0 10 11 12 | + 2 * |  0  6  7  8 | =  |  0 22 25 28 |
+    *  |  0  0 15 16 |       |  0  0  9 10 |    |  0  0 33 36 |
+    *  \  0  0  0 20 /       \  0  0  0 11 /    \  0  0  0 42 /
+    */
 
-    Matrix mResult;
-    mResult.reset();
-    mResult.setDimensions( rows, cols );
-
-    mResult = m;
-
-    RealType matrixMultiplicator = 2;
-    RealType thisMatrixMultiplicator = 1;
-
-    mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
-    EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
-    EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
-
-    EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
-    EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
-    EXPECT_EQ( mResult.getElement( 0, 2 ),  9 );
-    EXPECT_EQ( mResult.getElement( 0, 3 ), 12 );
-
-    EXPECT_EQ( mResult.getElement( 1, 0 ), 15 );
-    EXPECT_EQ( mResult.getElement( 1, 1 ), 18 );
-    EXPECT_EQ( mResult.getElement( 1, 2 ), 21 );
-    EXPECT_EQ( mResult.getElement( 1, 3 ), 24 );
-
-    EXPECT_EQ( mResult.getElement( 2, 0 ), 27 );
-    EXPECT_EQ( mResult.getElement( 2, 1 ), 30 );
-    EXPECT_EQ( mResult.getElement( 2, 2 ), 33 );
-    EXPECT_EQ( mResult.getElement( 2, 3 ), 36 );
-
-    EXPECT_EQ( mResult.getElement( 3, 0 ), 39 );
-    EXPECT_EQ( mResult.getElement( 3, 1 ), 42 );
-    EXPECT_EQ( mResult.getElement( 3, 2 ), 45 );
-    EXPECT_EQ( mResult.getElement( 3, 3 ), 48 );
-
-    EXPECT_EQ( mResult.getElement( 4, 0 ), 51 );
-    EXPECT_EQ( mResult.getElement( 4, 1 ), 54 );
-    EXPECT_EQ( mResult.getElement( 4, 2 ), 57 );
-    EXPECT_EQ( mResult.getElement( 4, 3 ), 60 );
+   Matrix1 mResult;
+   mResult.reset();
+   mResult.setDimensions( rows, cols );
+
+   mResult = m;
+
+   RealType matrixMultiplicator = 2;
+   RealType thisMatrixMultiplicator = 1;
+
+   mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) );
+   EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) );
+   EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) );
+   EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) );
+   EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) );
+   EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) );
+   EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) );
+   EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) );
+
+   EXPECT_EQ( mResult.getElement( 0, 0 ),  3 );
+   EXPECT_EQ( mResult.getElement( 0, 1 ),  6 );
+   EXPECT_EQ( mResult.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 1, 0 ), 11 );
+   EXPECT_EQ( mResult.getElement( 1, 1 ), 14 );
+   EXPECT_EQ( mResult.getElement( 1, 2 ), 17 );
+   EXPECT_EQ( mResult.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( mResult.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 2, 1 ), 22 );
+   EXPECT_EQ( mResult.getElement( 2, 2 ), 25 );
+   EXPECT_EQ( mResult.getElement( 2, 3 ), 28 );
+
+   EXPECT_EQ( mResult.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 3, 2 ), 33 );
+   EXPECT_EQ( mResult.getElement( 3, 3 ), 36 );
+
+   EXPECT_EQ( mResult.getElement( 4, 0 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 1 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 2 ),  0 );
+   EXPECT_EQ( mResult.getElement( 4, 3 ), 42 );
 }
 
 template< typename Matrix >
@@ -1162,43 +1166,44 @@ void test_AssignmentOperator()
    using RealType = typename Matrix::RealType;
    using DeviceType = typename Matrix::DeviceType;
    using IndexType = typename Matrix::IndexType;
+   constexpr bool rowMajorOrder = Matrix::getRowMajorOrder();
 
-   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >;
-   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >;
+   using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >;
+   using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >;
 
    const IndexType rows( 10 ), columns( 10 );
    TridiagonalHost hostMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         hostMatrix.setElement( i, j,  i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j <  columns; j++ )
+         if( abs( i - j ) <= 1 )
+            hostMatrix.setElement( i, j,  i + j );
 
    Matrix matrix( rows, columns );
    matrix.getValues() = 0.0;
    matrix = hostMatrix;
    for( IndexType i = 0; i < columns; i++ )
       for( IndexType j = 0; j < rows; j++ )
-      {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
-            EXPECT_EQ( matrix.getElement( i, j ), i + j );
-      }
+            if( abs( i - j ) <= 1 )
+               EXPECT_EQ( matrix.getElement( i, j ), i + j );
+            else
+               EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
 
 #ifdef HAVE_CUDA
    TridiagonalCuda cudaMatrix( rows, columns );
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j <= i; j++ )
-         cudaMatrix.setElement( i, j, i + j );
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
+         if( abs( i - j ) <= 1 )
+            cudaMatrix.setElement( i, j, i + j );
 
    matrix.getValues() = 0.0;
    matrix = cudaMatrix;
-   for( IndexType i = 0; i < columns; i++ )
-      for( IndexType j = 0; j < rows; j++ )
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < columns; j++ )
       {
-         if( j > i )
-            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
-         else
+         if( abs( i - j ) <= 1 )
             EXPECT_EQ( matrix.getElement( i, j ), i + j );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
       }
 #endif
 }
@@ -1207,123 +1212,125 @@ void test_AssignmentOperator()
 template< typename Matrix >
 void test_SaveAndLoad()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 4x4 dense matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    \ 13 14 15 16 /
- */
-    const IndexType rows = 4;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix savedMatrix;
-    savedMatrix.reset();
-    savedMatrix.setDimensions( rows, cols );
+   /*
+    * Sets up the following 4x4 dense matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    \  0  0 15 16 /
+    */
+   const IndexType rows = 4;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++ )
-        for( IndexType j = 0; j < cols; j++ )
-            savedMatrix.setElement( i, j, value++ );
-
-    ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
-
-    Matrix loadedMatrix;
-    loadedMatrix.reset();
-    loadedMatrix.setDimensions( rows, cols );
-
-    ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
-
-    EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  3 );
-    EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  4 );
-
-    EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
-    EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  8 );
-
-    EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  9 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
-    EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
-
-    EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
-    EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
+   Matrix savedMatrix( rows, cols );
+
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++ )
+      for( IndexType j = 0; j < cols; j++ )
+      {
+         if( abs( i - j ) <= 1 )
+            savedMatrix.setElement( i, j, value );
+         value++;
+      }
+
+   ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) );
+
+   Matrix loadedMatrix;
+
+   ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) );
+
+   EXPECT_EQ( savedMatrix.getElement( 0, 0 ),  1 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 1 ),  2 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 2 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 0, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 1, 0 ),  5 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 1 ),  6 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 2 ),  7 );
+   EXPECT_EQ( savedMatrix.getElement( 1, 3 ),  0 );
+
+   EXPECT_EQ( savedMatrix.getElement( 2, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 );
+   EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 );
+
+   EXPECT_EQ( savedMatrix.getElement( 3, 0 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 1 ),  0 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 );
+   EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 );
 }
 
 template< typename Matrix >
 void test_Print()
 {
-    using RealType = typename Matrix::RealType;
-    using DeviceType = typename Matrix::DeviceType;
-    using IndexType = typename Matrix::IndexType;
-/*
- * Sets up the following 5x4 sparse matrix:
- *
- *    /  1  2  3  4 \
- *    |  5  6  7  8 |
- *    |  9 10 11 12 |
- *    | 13 14 15 16 |
- *    \ 17 18 19 20 /
- */
-    const IndexType rows = 5;
-    const IndexType cols = 4;
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
 
-    Matrix m;
-    m.reset();
-    m.setDimensions( rows, cols );
+   /*
+    * Sets up the following 5x4 sparse matrix:
+    *
+    *    /  1  2  0  0 \
+    *    |  5  6  7  0 |
+    *    |  0 10 11 12 |
+    *    |  0  0 15 16 |
+    *    \  0  0  0 20 /
+    */
+   const IndexType rows = 5;
+   const IndexType cols = 4;
 
-    RealType value = 1;
-    for( IndexType i = 0; i < rows; i++)
-        for( IndexType j = 0; j < cols; j++)
-            m.setElement( i, j, value++ );
+   Matrix m( rows, cols );
 
-    #include <sstream>
-    std::stringstream printed;
-    std::stringstream couted;
+   RealType value = 1;
+   for( IndexType i = 0; i < rows; i++)
+      for( IndexType j = 0; j < cols; j++)
+      {
+         if( abs( i - j ) <= 1 )
+            m.setElement( i, j, value );
+         value++;
+      }
 
-    //change the underlying buffer and save the old buffer
-    auto old_buf = std::cout.rdbuf(printed.rdbuf());
+   std::stringstream printed;
+   std::stringstream couted;
 
-    m.print( std::cout ); //all the std::cout goes to ss
+   //change the underlying buffer and save the old buffer
+   auto old_buf = std::cout.rdbuf(printed.rdbuf());
 
-    std::cout.rdbuf(old_buf); //reset
+   m.print( std::cout ); //all the std::cout goes to ss
 
-    couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3	 Col:3->4\t\n"
-              "Row: 1 ->  Col:0->5	 Col:1->6	 Col:2->7	 Col:3->8\t\n"
-              "Row: 2 ->  Col:0->9	 Col:1->10	 Col:2->11	 Col:3->12\t\n"
-              "Row: 3 ->  Col:0->13	 Col:1->14	 Col:2->15	 Col:3->16\t\n"
-              "Row: 4 ->  Col:0->17	 Col:1->18	 Col:2->19	 Col:3->20\t\n";
+   std::cout.rdbuf(old_buf); //reset
+   couted << "Row: 0 ->  Col:0->1\t Col:1->2\t\n"
+             "Row: 1 ->  Col:0->5\t Col:1->6\t Col:2->7\t\n"
+             "Row: 2 ->  Col:1->10\t Col:2->11\t Col:3->12\t\n"
+             "Row: 3 ->  Col:2->15\t Col:3->16\t\n"
+             "Row: 4 ->  Col:3->20\t\n";
 
-    EXPECT_EQ( printed.str(), couted.str() );
+   EXPECT_EQ( printed.str(), couted.str() );
 }
 
 // test fixture for typed tests
@@ -1470,6 +1477,19 @@ TYPED_TEST( MatrixTest, addMatrixTest )
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    using RealType = typename MatrixType::RealType;
+    using DeviceType = typename MatrixType::DeviceType;
+    using IndexType = typename MatrixType::IndexType;
+    using RealAllocatorType = typename MatrixType::RealAllocatorType;
+    using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >;
+
+    test_AddMatrix< MatrixType, MatrixType2 >();
+}
+
 TYPED_TEST( MatrixTest, assignmentOperatorTest )
 {
     using MatrixType = typename TestFixture::MatrixType;