From 72972cfed69a49860bcd71d94fbe34a1dcefde33 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Wed, 8 Jan 2020 18:27:50 +0100
Subject: [PATCH] Added TridiagonalMatrixView.

---
 src/TNL/Matrices/TridiagonalMatrixView.h   | 209 ++++++
 src/TNL/Matrices/TridiagonalMatrixView.hpp | 759 +++++++++++++++++++++
 2 files changed, 968 insertions(+)
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.h
 create mode 100644 src/TNL/Matrices/TridiagonalMatrixView.hpp

diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h
new file mode 100644
index 0000000000..3f57fe1c3e
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.h
@@ -0,0 +1,209 @@
+/***************************************************************************
+                          Tridiagonal.h  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Matrices/Matrix.h>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Matrices/TridiagonalRow.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Device >
+class TridiagonalDeviceDependentCode;
+
+template< typename Real = double,
+          typename Device = Devices::Host,
+          typename Index = int >
+class Tridiagonal : public Matrix< Real, Device, Index >
+{
+private:
+   // convenient template alias for controlling the selection of copy-assignment operator
+   template< typename Device2 >
+   using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
+
+   // friend class will be needed for templated assignment operators
+   template< typename Real2, typename Device2, typename Index2 >
+   friend class Tridiagonal;
+
+public:
+   typedef Real RealType;
+   typedef Device DeviceType;
+   typedef Index IndexType;
+   typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector;
+   typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
+   typedef Matrix< Real, Device, Index > BaseType;
+   typedef TridiagonalRow< Real, Index > MatrixRow;
+
+   template< typename _Real = Real,
+             typename _Device = Device,
+             typename _Index = Index >
+   using Self = Tridiagonal< _Real, _Device, _Index >;
+
+   Tridiagonal();
+
+   static String getSerializationType();
+
+   virtual String getSerializationTypeVirtual() const;
+
+   void setDimensions( const IndexType rows,
+                       const IndexType columns );
+
+   void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
+
+   IndexType getRowLength( const IndexType row ) const;
+
+   __cuda_callable__
+   IndexType getRowLengthFast( const IndexType row ) const;
+
+   IndexType getMaxRowLength() const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   void setLike( const Tridiagonal< Real2, Device2, Index2 >& m );
+
+   IndexType getNumberOfMatrixElements() const;
+
+   IndexType getNumberOfNonzeroMatrixElements() const;
+
+   IndexType getMaxRowlength() const;
+
+   void reset();
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   template< typename Real2, typename Device2, typename Index2 >
+   bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const;
+
+   void setValue( const RealType& v );
+
+   __cuda_callable__
+   bool setElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value );
+
+   bool setElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value );
+
+   __cuda_callable__
+   bool addElementFast( const IndexType row,
+                        const IndexType column,
+                        const RealType& value,
+                        const RealType& thisElementMultiplicator = 1.0 );
+
+   bool addElement( const IndexType row,
+                    const IndexType column,
+                    const RealType& value,
+                    const RealType& thisElementMultiplicator = 1.0 );
+
+   __cuda_callable__
+   bool setRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType elements );
+
+   bool setRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType elements );
+
+   __cuda_callable__
+   bool addRowFast( const IndexType row,
+                    const IndexType* columns,
+                    const RealType* values,
+                    const IndexType elements,
+                    const RealType& thisRowMultiplicator = 1.0 );
+
+   bool addRow( const IndexType row,
+                const IndexType* columns,
+                const RealType* values,
+                const IndexType elements,
+                const RealType& thisRowMultiplicator = 1.0 );
+
+   __cuda_callable__
+   RealType getElementFast( const IndexType row,
+                            const IndexType column ) const;
+
+   RealType getElement( const IndexType row,
+                        const IndexType column ) const;
+
+   __cuda_callable__
+   void getRowFast( const IndexType row,
+                    IndexType* columns,
+                    RealType* values ) const;
+
+   __cuda_callable__
+   MatrixRow getRow( const IndexType rowIndex );
+
+   __cuda_callable__
+   const MatrixRow getRow( const IndexType rowIndex ) const;
+
+   template< typename Vector >
+   __cuda_callable__
+   typename Vector::RealType rowVectorProduct( const IndexType row,
+                                               const Vector& vector ) const;
+
+   template< typename InVector,
+             typename OutVector >
+   void vectorProduct( const InVector& inVector,
+                       OutVector& outVector ) const;
+
+   template< typename Real2, typename Index2 >
+   void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                   const RealType& matrixMultiplicator = 1.0,
+                   const RealType& thisMatrixMultiplicator = 1.0 );
+
+   template< typename Real2, typename Index2 >
+   void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                          const RealType& matrixMultiplicator = 1.0 );
+
+   template< typename Vector1, typename Vector2 >
+   __cuda_callable__
+   void performSORIteration( const Vector1& b,
+                             const IndexType row,
+                             Vector2& x,
+                             const RealType& omega = 1.0 ) const;
+
+   // copy assignment
+   Tridiagonal& operator=( const Tridiagonal& matrix );
+
+   // cross-device copy assignment
+   template< typename Real2, typename Device2, typename Index2,
+             typename = typename Enabler< Device2 >::type >
+   Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix );
+
+   void save( File& file ) const;
+
+   void load( File& file );
+
+   void save( const String& fileName ) const;
+
+   void load( const String& fileName );
+
+   void print( std::ostream& str ) const;
+
+protected:
+
+   __cuda_callable__
+   IndexType getElementIndex( const IndexType row,
+                              const IndexType column ) const;
+
+   Containers::Vector< RealType, DeviceType, IndexType > values;
+
+   typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode;
+   friend class TridiagonalDeviceDependentCode< DeviceType >;
+};
+
+} // namespace Matrices
+} // namespace TNL
+
+#include <TNL/Matrices/Tridiagonal_impl.h>
diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp
new file mode 100644
index 0000000000..2752f68503
--- /dev/null
+++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp
@@ -0,0 +1,759 @@
+/***************************************************************************
+                          Tridiagonal_impl.h  -  description
+                             -------------------
+    begin                : Nov 30, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#include <TNL/Assert.h>
+#include <TNL/Matrices/Tridiagonal.h>
+#include <TNL/Exceptions/NotImplementedError.h>
+
+namespace TNL {
+namespace Matrices {   
+
+template< typename Device >
+class TridiagonalDeviceDependentCode;
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Tridiagonal< Real, Device, Index >::Tridiagonal()
+{
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          String( TNL::getType< Real >() ) +
+          String( ", " ) +
+          String( Device :: getDeviceType() ) +
+          String( ", " ) +
+          String( TNL::getType< Index >() ) +
+          String( " >" );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getTypeVirtual() const
+{
+   return this->getType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getSerializationType()
+{
+   return String( "Matrices::Tridiagonal< " ) +
+          getType< RealType >() + ", " +
+          getType< Device >() + ", " +
+          getType< IndexType >() + " >";
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const
+{
+   return this->getSerializationType();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows,
+                                                        const IndexType columns )
+{
+   Matrix< Real, Device, Index >::setDimensions( rows, columns );
+   values.setSize( 3*min( rows, columns ) );
+   this->values.setValue( 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
+{
+   if( rowLengths[ 0 ] > 2 )
+      throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   for( Index i = 1; i < diagonalLength-1; i++ )
+      if( rowLengths[ i ] > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() > this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 1 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() == this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 2 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+   if( this->getRows() < this->getColumns() )
+      if( rowLengths[ this->getRows()-1 ] > 3 )
+         throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const
+{
+   return this->getRowLengthFast( row );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
+{
+   const IndexType diagonalLength = min( this->getRows(), this->getColumns() );
+   if( row == 0 )
+      return 2;
+   if( row > 0 && row < diagonalLength - 1 )
+      return 3;
+   if( this->getRows() > this->getColumns() )
+      return 1;
+   if( this->getRows() == this->getColumns() )
+      return 2;
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const
+{
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m )
+{
+   this->setDimensions( m.getRows(), m.getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const
+{
+   return 3 * min( this->getRows(), this->getColumns() );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const
+{
+   IndexType nonzeroElements = 0;
+   for( IndexType i = 0; i < this->values.getSize(); i++ )
+      if( this->values.getElement( i ) != 0 )
+         nonzeroElements++;
+   return nonzeroElements;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Index
+Tridiagonal< Real, Device, Index >::
+getMaxRowlength() const
+{
+   return 3;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::reset()
+{
+   Matrix< Real, Device, Index >::reset();
+   this->values.reset();
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+{
+   return this->values == matrix.values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2 >
+bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const
+{
+   return this->values != matrix.values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::setValue( const RealType& v )
+{
+   this->values.setValue( v );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row,
+                                                                  const IndexType column,
+                                                                  const RealType& value )
+{
+   this->values[ this->getElementIndex( row, column ) ] = value;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row,
+                                                              const IndexType column,
+                                                              const RealType& value )
+{
+   this->values.setElement( this->getElementIndex( row, column ), value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row,
+                                                                  const IndexType column,
+                                                                  const RealType& value,
+                                                                  const RealType& thisElementMultiplicator )
+{
+   const Index i = this->getElementIndex( row, column );
+   this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value;
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row,
+                                                              const IndexType column,
+                                                              const RealType& value,
+                                                              const RealType& thisElementMultiplicator )
+{
+   const Index i = this->getElementIndex( row, column );
+   this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value );
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row,
+                                                              const IndexType* columns,
+                                                              const RealType* values,
+                                                              const IndexType elements )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   return this->addRowFast( row, columns, values, elements, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row,
+                                                          const IndexType* columns,
+                                                          const RealType* values,
+                                                          const IndexType elements )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   return this->addRow( row, columns, values, elements, 0.0 );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row,
+                                                              const IndexType* columns,
+                                                              const RealType* values,
+                                                              const IndexType elements,
+                                                              const RealType& thisRowMultiplicator )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   if( elements > 3 )
+      return false;
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType& column = columns[ i ];
+      if( column < row - 1 || column > row + 1 )
+         return false;
+      addElementFast( row, column, values[ i ], thisRowMultiplicator );
+   }
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row,
+                                                          const IndexType* columns,
+                                                          const RealType* values,
+                                                          const IndexType elements,
+                                                          const RealType& thisRowMultiplicator )
+{
+   TNL_ASSERT( elements <= this->columns,
+            std::cerr << " elements = " << elements
+                 << " this->columns = " << this->columns );
+   if( elements > 3 )
+      return false;
+   for( IndexType i = 0; i < elements; i++ )
+   {
+      const IndexType column = columns[ i ];
+      if( column < row - 1 || column > row + 1 )
+         return false;
+      addElement( row, column, values[ i ], thisRowMultiplicator );
+   }
+   return true;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row,
+                                                                  const IndexType column ) const
+{
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values[ this->getElementIndex( row, column ) ];
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row,
+                                                              const IndexType column ) const
+{
+   if( abs( column - row ) > 1 )
+      return 0.0;
+   return this->values.getElement( this->getElementIndex( row, column ) );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row,
+                                                              IndexType* columns,
+                                                              RealType* values ) const
+{
+   IndexType elementPointer( 0 );
+   for( IndexType i = -1; i <= 1; i++ )
+   {
+      const IndexType column = row + 1;
+      if( column >= 0 && column < this->getColumns() )
+      {
+         columns[ elementPointer ] = column;
+         values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ];
+         elementPointer++;
+      }
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+typename Tridiagonal< Real, Device, Index >::MatrixRow
+Tridiagonal< Real, Device, Index >::
+getRow( const IndexType rowIndex )
+{
+   if( std::is_same< Device, Devices::Host >::value )
+      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
+                        rowIndex,
+                        this->getColumns(),
+                        1 );
+   if( std::is_same< Device, Devices::Cuda >::value )
+      return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ],
+                        rowIndex,
+                        this->getColumns(),
+                        this->rows );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+const typename Tridiagonal< Real, Device, Index >::MatrixRow
+Tridiagonal< Real, Device, Index >::
+getRow( const IndexType rowIndex ) const
+{
+   throw Exceptions::NotImplementedError();
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+template< typename Vector >
+__cuda_callable__
+typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row,
+                                                                                         const Vector& vector ) const
+{
+   return TridiagonalDeviceDependentCode< Device >::
+             rowVectorProduct( this->rows,
+                               this->values,
+                               row,
+                               vector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename InVector,
+             typename OutVector >
+void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector,
+                                                                 OutVector& outVector ) const
+{
+   TNL_ASSERT( this->getColumns() == inVector.getSize(),
+            std::cerr << "Matrix columns: " << this->getColumns() << std::endl
+                 << "Vector size: " << inVector.getSize() << std::endl );
+   TNL_ASSERT( this->getRows() == outVector.getSize(),
+               std::cerr << "Matrix rows: " << this->getRows() << std::endl
+                    << "Vector size: " << outVector.getSize() << std::endl );
+
+   DeviceDependentCode::vectorProduct( *this, inVector, outVector );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                                                    const RealType& matrixMultiplicator,
+                                                    const RealType& thisMatrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+            std::cerr << "This matrix columns: " << this->getColumns() << std::endl
+                 << "This matrix rows: " << this->getRows() << std::endl );
+
+   if( thisMatrixMultiplicator == 1.0 )
+      this->values += matrixMultiplicator * matrix.values;
+   else
+      this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values;
+}
+
+#ifdef HAVE_CUDA
+template< typename Real,
+          typename Real2,
+          typename Index,
+          typename Index2 >
+__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix,
+                                                             Tridiagonal< Real, Devices::Cuda, Index >* outMatrix,
+                                                             const Real matrixMultiplicator,
+                                                             const Index gridIdx )
+{
+   const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
+   if( rowIdx < inMatrix->getRows() )
+   {
+      if( rowIdx > 0 )
+        outMatrix->setElementFast( rowIdx-1,
+                                   rowIdx,
+                                   matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) );
+      outMatrix->setElementFast( rowIdx,
+                                 rowIdx,
+                                 matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) );
+      if( rowIdx < inMatrix->getRows()-1 )
+         outMatrix->setElementFast( rowIdx+1,
+                                    rowIdx,
+                                    matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) );
+   }
+}
+#endif
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Index2 >
+void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix,
+                                                                    const RealType& matrixMultiplicator )
+{
+   TNL_ASSERT( this->getRows() == matrix.getRows(),
+               std::cerr << "This matrix rows: " << this->getRows() << std::endl
+                    << "That matrix rows: " << matrix.getRows() << std::endl );
+   if( std::is_same< Device, Devices::Host >::value )
+   {
+      const IndexType& rows = matrix.getRows();
+      for( IndexType i = 1; i < rows; i++ )
+      {
+         RealType aux = matrix. getElement( i, i - 1 );
+         this->setElement( i, i - 1, matrix.getElement( i - 1, i ) );
+         this->setElement( i, i, matrix.getElement( i, i ) );
+         this->setElement( i - 1, i, aux );
+      }
+   }
+   if( std::is_same< Device, Devices::Cuda >::value )
+   {
+#ifdef HAVE_CUDA
+      Tridiagonal* kernel_this = Cuda::passToDevice( *this );
+      typedef  Tridiagonal< Real2, Device, Index2 > InMatrixType;
+      InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix );
+      dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
+      const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
+      const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
+      for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
+      {
+         if( gridIdx == cudaGrids - 1 )
+            cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
+         TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>>
+                                                    ( kernel_inMatrix,
+                                                      kernel_this,
+                                                      matrixMultiplicator,
+                                                      gridIdx );
+      }
+      Cuda::freeFromDevice( kernel_this );
+      Cuda::freeFromDevice( kernel_inMatrix );
+      TNL_CHECK_CUDA_DEVICE;
+#endif
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Vector1, typename Vector2 >
+__cuda_callable__
+void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b,
+                                                              const IndexType row,
+                                                              Vector2& x,
+                                                              const RealType& omega ) const
+{
+   RealType sum( 0.0 );
+   if( row > 0 )
+      sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ];
+   if( row < this->getColumns() - 1 )
+      sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ];
+   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum );
+}
+
+
+// copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix )
+{
+   this->setLike( matrix );
+   this->values = matrix.values;
+   return *this;
+}
+
+// cross-device copy assignment
+template< typename Real,
+          typename Device,
+          typename Index >
+   template< typename Real2, typename Device2, typename Index2, typename >
+Tridiagonal< Real, Device, Index >&
+Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix )
+{
+   static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value,
+                  "unknown device" );
+   static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value,
+                  "unknown device" );
+
+   this->setLike( matrix );
+
+   throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet.");
+}
+
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::save( File& file ) const
+{
+   Matrix< Real, Device, Index >::save( file );
+   file << this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::load( File& file )
+{
+   Matrix< Real, Device, Index >::load( file );
+   file >> this->values;
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const
+{
+   Object::save( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::load( const String& fileName )
+{
+   Object::load( fileName );
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const
+{
+   for( IndexType row = 0; row < this->getRows(); row++ )
+   {
+      str <<"Row: " << row << " -> ";
+      for( IndexType column = row - 1; column < row + 2; column++ )
+         if( column >= 0 && column < this->columns )
+            str << " Col:" << column << "->" << this->getElement( row, column ) << "\t";
+      str << std::endl;
+   }
+}
+
+template< typename Real,
+          typename Device,
+          typename Index >
+__cuda_callable__
+Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row,
+                                                                    const IndexType column ) const
+{
+   TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows,
+              std::cerr << " this->rows = " << this->rows
+                   << " row = " << row << " column = " << column );
+   TNL_ASSERT( abs( row - column ) < 2,
+              std::cerr << "row = " << row << " column = " << column << std::endl );
+   return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column );
+}
+
+template<>
+class TridiagonalDeviceDependentCode< Devices::Host >
+{
+   public:
+
+      typedef Devices::Host Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return 2*row + column;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType  >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ 1 ];
+         Index i = 3 * row;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i - 1 ] +
+                   vector[ row ] * values[ i ];
+         return vector[ row - 1 ] * values[ i - 1 ] +
+                vector[ row ] * values[ i ] +
+                vector[ row + 1 ] * values[ i + 1 ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+#ifdef HAVE_OPENMP
+#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
+#endif
+         for( Index row = 0; row < matrix.getRows(); row ++ )
+            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
+      }
+};
+
+template<>
+class TridiagonalDeviceDependentCode< Devices::Cuda >
+{
+   public:
+ 
+      typedef Devices::Cuda Device;
+
+      template< typename Index >
+      __cuda_callable__
+      static Index getElementIndex( const Index rows,
+                                    const Index row,
+                                    const Index column )
+      {
+         return ( column - row + 1 )*rows + row - 1;
+      }
+
+      template< typename Vector,
+                typename Index,
+                typename ValuesType >
+      __cuda_callable__
+      static typename Vector::RealType rowVectorProduct( const Index rows,
+                                                         const ValuesType& values,
+                                                         const Index row,
+                                                         const Vector& vector )
+      {
+         if( row == 0 )
+            return vector[ 0 ] * values[ 0 ] +
+                   vector[ 1 ] * values[ rows - 1 ];
+         Index i = row - 1;
+         if( row == rows - 1 )
+            return vector[ row - 1 ] * values[ i ] +
+                   vector[ row ] * values[ i + rows ];
+         return vector[ row - 1 ] * values[ i ] +
+                vector[ row ] * values[ i + rows ] +
+                vector[ row + 1 ] * values[ i + 2*rows ];
+      }
+
+      template< typename Real,
+                typename Index,
+                typename InVector,
+                typename OutVector >
+      static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix,
+                                 const InVector& inVector,
+                                 OutVector& outVector )
+      {
+         MatrixVectorProductCuda( matrix, inVector, outVector );
+      }
+};
+
+} // namespace Matrices
+} // namespace TNL
-- 
GitLab