Commit bb234945 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Implementing dense matrix assignment.

parent 7cc6eab9
Loading
Loading
Loading
Loading
+23 −4
Original line number Diff line number Diff line
@@ -167,12 +167,31 @@ class Dense : public Matrix< Real, Device, Index >
                                Vector2& x,
                                const RealType& omega = 1.0 ) const;

      // copy assignment
      /**
       * \brief Assignment operator for exactly the same type of the dense matrix.
       * 
       * @param matrix
       * @return 
       */
      Dense& operator=( const Dense& matrix );

      // cross-device copy assignment
      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAlocator_ >
      Dense& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAlocator_ >& matrix );
      /**
       * \brief Assignment operator for other dense matrices.
       * 
       * @param matrix
       * @return 
       */
      template< typename RHSReal, typename RHSDevice, typename RHSIndex,
                 bool RHSRowMajorOrder, typename RHSRealAllocator >
      Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix );

      /**
       * \brief Assignment operator for other (sparse) types of matrices.
       * @param matrix
       * @return 
       */
      template< typename RHSMatrix >
      Dense& operator=( const RHSMatrix& matrix );

      template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ >
      bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const;
+144 −76
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ void
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
setLike( const Matrix_& matrix )
{
   Matrix< Real, Device, Index, RealAllocator >::setLike( matrix );
   this->setDimensions( matrix.getRows(), matrix.getColumns() );
}

template< typename Real,
@@ -896,13 +896,50 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat
{
   setLike( matrix );
   this->values = matrix.values;
   /*const IndexType bufferRowsCount( 128 );
   const IndexType columns = this->getColumns();
   const size_t bufferSize = bufferRowsCount * columns;
   Containers::Vector< RealType, Device, IndexType, RealAllocatorType > sourceValuesBuffer( bufferSize );
   Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
   auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
   auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
   return *this;
}

template< typename Real,
          typename Device,
          typename Index,
          bool RowMajorOrder,
          typename RealAllocator >
   template< typename RHSReal, typename RHSDevice, typename RHSIndex,
             bool RHSRowMajorOrder, typename RHSRealAllocator >
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix )
{
   using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >;
   using RHSIndexType = typename RHSMatrix::IndexType;
   using RHSRealType = typename RHSMatrix::RealType;
   using RHSDeviceType = typename RHSMatrix::DeviceType;

   this->setLike( matrix );
   if( RowMajorOrder == RHSRowMajorOrder )
   {
      this->values = matrix.values;
      return *this;
   }

   auto this_view = this->view;
   if( std::is_same< DeviceType, RHSDeviceType >::value )
   {
      const auto segments_view = this->segments.getView();
      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
         this_view( rowIdx, columnIdx ) = value;
      };
      matrix.forAllRows( f );
   }
   else
   {
      const IndexType maxRowLength = matrix.getColumns();
      const IndexType bufferRowsCount( 128 );
      const size_t bufferSize = bufferRowsCount * maxRowLength;
      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize );
      Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize );
      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
      auto thisValuesBuffer_view = thisValuesBuffer.getView();

      IndexType baseRow( 0 );
      const IndexType rowsCount = this->getRows();
@@ -912,23 +949,28 @@ operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& mat

         ////
         // Copy matrix elements into buffer
      auto f1 = [=] __cuda_callable__ ( Index rowIdx, Index columnIdx, Index globalIdx, const Real& value ) mutable {
         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
         sourceValuesBuffer_view[ bufferIdx ] = value;
         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx;
            matrixValuesBuffer_view[ bufferIdx ] = value;
         };
         matrix.forRows( baseRow, lastRow, f1 );
      destinationValuesBuffer = sourceValuesBuffer;

         ////
      // Copy buffer to this matrix
      auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value ) mutable {
         const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
         value = destinationValuesBuffer_view[ bufferIdx ];
         // Copy the source matrix buffer to this matrix buffer
         thisValuesBuffer_view = matrixValuesBuffer_view;

         ////
         // Copy matrix elements from the buffer to the matrix.
         auto this_view = this->view;
         auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable {
            IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx;
            this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
         };
      this->forRows( baseRow, lastRow, f2 );
         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
         baseRow += bufferRowsCount;
      }
   return *this;*/
   }
   return *this;
}

template< typename Real,
@@ -936,61 +978,87 @@ template< typename Real,
          typename Index,
          bool RowMajorOrder,
          typename RealAllocator >
   template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ >
   template< typename RHSMatrix >
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >&
Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::
operator=( const Dense< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix )
{
   this->setLike( matrix );
   if( RowMajorOrder == RowMajorOrder_ )
      this->values = matrix.getValues();
   else
operator=( const RHSMatrix& matrix )
{
      if( std::is_same< DeviceType, Device_ >::value )
   using RHSIndexType = typename RHSMatrix::IndexType;
   using RHSRealType = typename RHSMatrix::RealType;
   using RHSDeviceType = typename RHSMatrix::DeviceType;
   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;

   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
   matrix.getCompressedRowLengths( rowLengths );
   this->setDimensions( matrix.getRows(), matrix.getColumns() );

   // TODO: use getConstView when it works
   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
   auto values_view = this->values.getView();
   RHSIndexType padding_index = matrix.getPaddingIndex();
   this->values = 0.0;

   if( std::is_same< DeviceType, RHSDeviceType >::value )
   {
         auto this_view = this->getView();
         auto f = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
            this_view.getRow( rowIdx ).setElement( columnIdx, value );
      const auto segments_view = this->segments.getView();
      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable {
         if( value != 0.0 && columnIdx != padding_index )
            values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value;
      };
      matrix.forAllRows( f );
   }
   else
   {
      const IndexType maxRowLength = max( rowLengths );
      const IndexType bufferRowsCount( 128 );
         const IndexType columns = this->getColumns();
         const size_t bufferSize = bufferRowsCount * columns;
         Containers::Vector< RealType, Device_, IndexType, RealAllocator_ > sourceValuesBuffer( bufferSize );
         Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > destinationValuesBuffer( bufferSize );
         auto sourceValuesBuffer_view = sourceValuesBuffer.getView();
         auto destinationValuesBuffer_view = destinationValuesBuffer.getView();
      const size_t bufferSize = bufferRowsCount * maxRowLength;
      Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize );
      Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize );
      Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize );
      Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize );
      auto matrixValuesBuffer_view = matrixValuesBuffer.getView();
      auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView();
      auto thisValuesBuffer_view = thisValuesBuffer.getView();
      auto thisColumnsBuffer_view = thisColumnsBuffer.getView();

      IndexType baseRow( 0 );
      const IndexType rowsCount = this->getRows();
      while( baseRow < rowsCount )
      {
         const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount );
         thisColumnsBuffer = padding_index;
         matrixColumnsBuffer_view = padding_index;

         ////
         // Copy matrix elements into buffer
            auto f1 = [=] __cuda_callable__ ( Index_ rowIdx, Index_ columnIdx, Index_ globalIdx, const Real_& value, bool& compute ) mutable {
               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
               sourceValuesBuffer_view[ bufferIdx ] = value;
         auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
            if( columnIndex != padding_index )
            {
               const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
               matrixColumnsBuffer_view[ bufferIdx ] = columnIndex;
               matrixValuesBuffer_view[ bufferIdx ] = value;
            }
         };
         matrix.forRows( baseRow, lastRow, f1 );

            destinationValuesBuffer = sourceValuesBuffer;
         ////
         // Copy the source matrix buffer to this matrix buffer
         thisValuesBuffer_view = matrixValuesBuffer_view;
         thisColumnsBuffer_view = matrixColumnsBuffer_view;

         ////
            // Copy buffer to this matrix
            auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, RealType& value, bool& compute ) mutable {
               const IndexType bufferIdx = ( rowIdx - baseRow ) * columns + columnIdx;
               value = destinationValuesBuffer_view[ bufferIdx ];
         // Copy matrix elements from the buffer to the matrix
         auto this_view = this->view;
         auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable {
            IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx;
            IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ];
            if( columnIdx != padding_index )
               this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ];
         };
            this->forRows( baseRow, lastRow, f2 );
         Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 );
         baseRow += bufferRowsCount;
      }
   }
   }
   this->view = this->getView();
   return *this;
}
+3 −0
Original line number Diff line number Diff line
@@ -201,6 +201,9 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator >

      IndexerType& getIndexer();

      __cuda_callable__
      IndexType getPaddingIndex() const;

   protected:

      __cuda_callable__
+14 −0
Original line number Diff line number Diff line
@@ -830,6 +830,20 @@ getElementIndex( const IndexType row, const IndexType column ) const
   return this->indexer.getGlobalIndex( row, localIdx );
}

template< typename Real,
          typename Device,
          typename Index,
          bool RowMajorOrder,
          typename RealAllocator,
          typename IndexAllocator >
__cuda_callable__
Index
Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::
getPaddingIndex() const
{
   return this->view.getPaddingIndex();
}

/*
template<>
class MultidiagonalDeviceDependentCode< Devices::Host >
+3 −0
Original line number Diff line number Diff line
@@ -163,6 +163,9 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index >
      __cuda_callable__
      IndexerType& getIndexer();

      __cuda_callable__
      IndexType getPaddingIndex() const;

   protected:

      __cuda_callable__
Loading