diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h
index 757fa4eae4fcc34d5fa0e8752dd8f658aaf66daa..9c05297d152f6809a140edbbe302dcf3e394f0e2 100644
--- a/src/TNL/Matrices/Dense.h
+++ b/src/TNL/Matrices/Dense.h
@@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index >
       using RealType = Real;
       using DeviceType = Device;
       using IndexType = Index;
-      using BaseType = Matrix< Real, Device, Index >;
+      using RealAllocatorType = RealAllocator;
+      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
       using ValuesType = typename BaseType::ValuesVector;
       using ValuesViewType = typename ValuesType::ViewType;
       using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp
index 7517c6b0eaadd31a1242f88ac2371ed1b304c2a2..7a6c4becc383e4398813b1c8d4b1645ad973ad7a 100644
--- a/src/TNL/Matrices/Dense.hpp
+++ b/src/TNL/Matrices/Dense.hpp
@@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
@@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function )
 {
    auto values_view = this->values.getView();
    auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
-      function( rowIdx, columnIdx, values_view[ globalIdx ] );
+      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
       return true;
    };
    this->segments.forSegments( first, last, f );
-
 }
 
 template< typename Real,
@@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
    x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
 }
 
+/*template< typename Real,
+          typename Device,
+          typename Index,
+          bool RowMajorOrder,
+          typename RealAllocator >
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
+Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
+operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
+{
+   const IndexType bufferRowsCount( 128 );
+   const IndexType columns = this->getColumns();
+   const size_t bufferSize = bufferRowsCount * columns;
+   Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
+   Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
+   auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
+   auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
+
+   IndexType baseRow( 0 );
+   const IndexType rowsCount = this->getRows();
+   while( baseRow < rowsCount )
+   {
+      const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+      ////
+      // Copy matrix elements into buffer
+      auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
+         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+         sourceValuesBuffer_view[ bufferIdx ] = value;
+      };
+      matrix.forRows( baseRow, lastRow, f1 );
+      destinationValuesBuffer = sourceValuesBuffer;
+
+      ////
+      // Copy buffer to this matrix
+      auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
+         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+         value = destinationValuesBuffer_view[ bufferIdx ];
+      };
+      this->forRows( baseRow, lastRow, f2 );
+      baseRow += bufferRowsCount;
+   }
+   return *this;
+}*/
+
 template< typename Real,
           typename Device,
           typename Index,
@@ -969,15 +1011,57 @@ Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
 Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
 operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
 {
+   this->setLike( matrix );
    if( RowMajorOrder == RowMajorOrder_ )
-   {
-      this->setLike( matrix );
       this->values = matrix.getValues();
-   }
    else
    {
-      
+      if( std::is_same< DeviceType, Device_ >::value )
+      {
+         auto this_view = this->getView();
+         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+            this_view.getRow( rowIdx ).setElement( columnIdx, value );
+         };
+         matrix.forAllRows( f );
+      }
+      else
+      {
+         const IndexType bufferRowsCount( 128 );
+         const IndexType columns = this->getColumns();
+         const size_t bufferSize = bufferRowsCount * columns;
+         Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
+         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
+         auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
+         auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
+
+         IndexType baseRow( 0 );
+         const IndexType rowsCount = this->getRows();
+         while( baseRow < rowsCount )
+         {
+            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
+
+            ////
+            // Copy matrix elements into buffer
+            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+               sourceValuesBuffer_view[ bufferIdx ] = value;
+            };
+            matrix.forRows( baseRow, lastRow, f1 );
+
+            destinationValuesBuffer = sourceValuesBuffer;
+
+            ////
+            // Copy buffer to this matrix
+            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
+               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
+               value = destinationValuesBuffer_view[ bufferIdx ];
+            };
+            this->forRows( baseRow, lastRow, f2 );
+            baseRow += bufferRowsCount;
+         }
+      }
    }
+   return *this;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index 2ddd19c7af3fa4eab6941a3ef25b1dd777acf67b..686602ebd10a33d7f73185cabbd274fe09ec1669 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -1169,9 +1169,51 @@ void test_PerformSORIteration()
 template< typename Matrix >
 void test_AssignmentOperator()
 {
-   EXPECT_EQ( 1, 0 );
+   using RealType = typename Matrix::RealType;
+   using DeviceType = typename Matrix::DeviceType;
+   using IndexType = typename Matrix::IndexType;
+
+   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
+   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;
+
+   const IndexType rows( 10 ), columns( 10 );
+   DenseHost hostMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         hostMatrix( i, j ) = i + j;
+
+   Matrix matrix( rows, columns );
+   matrix.getValues() = 0.0;
+   matrix = hostMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+
+#ifdef HAVE_CUDA
+   DenseCuda cudaMatrix( rows, columns );
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j <= i; j++ )
+         cudaMatrix.setElement( i, j, i + j );
+
+   matrix.getValues() = 0.0;
+   matrix = cudaMatrix;
+   for( IndexType i = 0; i < columns; i++ )
+      for( IndexType j = 0; j < rows; j++ )
+      {
+         if( j > i )
+            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
+         else
+            EXPECT_EQ( matrix.getElement( i, j ), i + j );
+      }
+#endif
 }
 
+
 template< typename Matrix >
 void test_SaveAndLoad()
 {
@@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest )
     test_AddMatrix< MatrixType >();
 }
 
+TYPED_TEST( MatrixTest, assignmentOperatorTest )
+{
+    using MatrixType = typename TestFixture::MatrixType;
+
+    test_AssignmentOperator< MatrixType >();
+}
+
 TYPED_TEST( MatrixTest, saveAndLoadTest )
 {
     using MatrixType = typename TestFixture::MatrixType;