Commit 74c5c158 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Implemented dense matrix assignment operator.

parent 50b1a44a
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -34,7 +34,8 @@ class Dense : public Matrix< Real, Device, Index >
      using RealType = Real;
      using DeviceType = Device;
      using IndexType = Index;
      using BaseType = Matrix< Real, Device, Index >;
      using RealAllocatorType = RealAllocator;
      using BaseType = Matrix< Real, Device, Index, RealAllocator >;
      using ValuesType = typename BaseType::ValuesVector;
      using ValuesViewType = typename ValuesType::ViewType;
      using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >;
+91 −7
Original line number Diff line number Diff line
@@ -377,7 +377,6 @@ forRows( IndexType first, IndexType last, Function& function ) const
      return true;
   };
   this->segments.forSegments( first, last, f );

}

template< typename Real,
@@ -392,11 +391,10 @@ forRows( IndexType first, IndexType last, Function& function )
{
   auto values_view = this->values.getView();
   auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx ) mutable -> bool {
      function( rowIdx, columnIdx, values_view[ globalIdx ] );
      function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] );
      return true;
   };
   this->segments.forSegments( first, last, f );

}

template< typename Real,
@@ -959,6 +957,50 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORItera
   x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
}

/*template< typename Real,
          typename Device,
          typename Index,
          bool RowMajorOrder,
          typename RealAllocator >
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix )
{
   const IndexType bufferRowsCount( 128 );
   const IndexType columns = this->getColumns();
   const size_t bufferSize = bufferRowsCount * columns;
   Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
   Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
   auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
   auto destinationValuesBuffer_view = destinationValuesBuffer.getView();

   IndexType baseRow( 0 );
   const IndexType rowsCount = this->getRows();
   while( baseRow < rowsCount )
   {
      const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );

      ////
      // Copy matrix elements into buffer
      auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
         sourceValuesBuffer_view[ bufferIdx ] = value;
      };
      matrix.forRows( baseRow, lastRow, f1 );
      destinationValuesBuffer = sourceValuesBuffer;

      ////
      // Copy buffer to this matrix
      auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
         value = destinationValuesBuffer_view[ bufferIdx ];
      };
      this->forRows( baseRow, lastRow, f2 );
      baseRow += bufferRowsCount;
   }
   return *this;
}*/

template< typename Real,
          typename Device,
          typename Index,
@@ -968,16 +1010,58 @@ template< typename Real,
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
{
   if( RowMajorOrder == RowMajorOrder_ )
{
   this->setLike( matrix );
   if( RowMajorOrder == RowMajorOrder_ )
      this->values = matrix.getValues();
   else
   {
      if( std::is_same< DeviceType, Device_ >::value )
      {
         auto this_view = this->getView();
         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
            this_view.getRow( rowIdx ).setElement( columnIdx, value );
         };
         matrix.forAllRows( f );
      }
      else
      {
         const IndexType bufferRowsCount( 128 );
         const IndexType columns = this->getColumns();
         const size_t bufferSize = bufferRowsCount * columns;
         Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
         auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
         auto destinationValuesBuffer_view = destinationValuesBuffer.getView();

         IndexType baseRow( 0 );
         const IndexType rowsCount = this->getRows();
         while( baseRow < rowsCount )
         {
            const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );

            ////
            // Copy matrix elements into buffer
            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value ) mutable {
               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
               sourceValuesBuffer_view[ bufferIdx ] = value;
            };
            matrix.forRows( baseRow, lastRow, f1 );

            destinationValuesBuffer = sourceValuesBuffer;

            ////
            // Copy buffer to this matrix
            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
               value = destinationValuesBuffer_view[ bufferIdx ];
            };
            this->forRows( baseRow, lastRow, f2 );
            baseRow += bufferRowsCount;
         }
      }
   }
   return *this;
}

template< typename Real,
+50 −1
Original line number Diff line number Diff line
@@ -1169,9 +1169,51 @@ void test_PerformSORIteration()
template< typename Matrix >
void test_AssignmentOperator()
{
   EXPECT_EQ( 1, 0 );
   using RealType = typename Matrix::RealType;
   using DeviceType = typename Matrix::DeviceType;
   using IndexType = typename Matrix::IndexType;

   using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >;
   using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >;

   const IndexType rows( 10 ), columns( 10 );
   DenseHost hostMatrix( rows, columns );
   for( IndexType i = 0; i < columns; i++ )
      for( IndexType j = 0; j <= i; j++ )
         hostMatrix( i, j ) = i + j;

   Matrix matrix( rows, columns );
   matrix.getValues() = 0.0;
   matrix = hostMatrix;
   for( IndexType i = 0; i < columns; i++ )
      for( IndexType j = 0; j < rows; j++ )
      {
         if( j > i )
            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
         else
            EXPECT_EQ( matrix.getElement( i, j ), i + j );
      }

#ifdef HAVE_CUDA
   DenseCuda cudaMatrix( rows, columns );
   for( IndexType i = 0; i < columns; i++ )
      for( IndexType j = 0; j <= i; j++ )
         cudaMatrix.setElement( i, j, i + j );

   matrix.getValues() = 0.0;
   matrix = cudaMatrix;
   for( IndexType i = 0; i < columns; i++ )
      for( IndexType j = 0; j < rows; j++ )
      {
         if( j > i )
            EXPECT_EQ( matrix.getElement( i, j ), 0.0 );
         else
            EXPECT_EQ( matrix.getElement( i, j ), i + j );
      }
#endif
}


template< typename Matrix >
void test_SaveAndLoad()
{
@@ -1426,6 +1468,13 @@ TYPED_TEST( MatrixTest, addMatrixTest )
    test_AddMatrix< MatrixType >();
}

TYPED_TEST( MatrixTest, assignmentOperatorTest )
{
    using MatrixType = typename TestFixture::MatrixType;

    test_AssignmentOperator< MatrixType >();
}

TYPED_TEST( MatrixTest, saveAndLoadTest )
{
    using MatrixType = typename TestFixture::MatrixType;