Commit c2514bc6 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Debuging assignment of tridiagonal matrix to sparse matrix.

parent 5927f064
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
                    const RealAllocatorType& realAllocator = RealAllocatorType(),
                    const IndexAllocatorType& indexAllocator = IndexAllocatorType() );

      ViewType getView();
      ViewType getView() const; // TODO: remove const

      ConstViewType getConstView() const;

+41 −18
Original line number Diff line number Diff line
@@ -82,13 +82,13 @@ template< typename Real,
          typename IndexAllocator >
auto
SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
getView() -> ViewType
getView() const -> ViewType
{
   return ViewType( this->getRows(),
                    this->getColumns(),
                    this->getValues().getView(),
                    this->columnIndexes.getView(),
                    this->segments.getView() );
                    const_cast< SparseMatrix* >( this )->getValues().getView(),  // TODO: remove const_cast
                    const_cast< SparseMatrix* >( this )->columnIndexes.getView(),
                    const_cast< SparseMatrix* >( this )->segments.getView() );
}

template< typename Real,
@@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
         thisValuesBuffer_view = matrixValuesBuffer_view;

         ////
         // Copy matrix elements from the buffer to the matrix
         // Copy matrix elements from the buffer to the matrix and ignoring
         // zero matrix elements.
         const IndexType matrix_columns = this->getColumns();
         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute  ) mutable {
            RealType inValue( 0.0 );
@@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix )
   using RHSRealType = typename RHSMatrix::RealType;
   using RHSDeviceType = typename RHSMatrix::DeviceType;
   using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
   using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
   using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >;

   typename RHSMatrix::RowsCapacitiesType rowLengths;
   Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
   matrix.getCompressedRowLengths( rowLengths );
   this->setDimensions( matrix.getRows(), matrix.getColumns() );
   this->setCompressedRowLengths( rowLengths );
   Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
   rowLocalIndexes = 0;


   // TODO: use getConstView when it works
   const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
   const IndexType paddingIndex = this->getPaddingIndex();
   auto columns_view = this->columnIndexes.getView();
   auto values_view = this->values.getView();
   auto rowLocalIndexes_view = rowLocalIndexes.getView();
   columns_view = paddingIndex;

   /*if( std::is_same< DeviceType, RHSDeviceType >::value )
   if( std::is_same< DeviceType, RHSDeviceType >::value )
   {
      const auto segments_view = this->segments.getView();
      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
      auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
         RealType inValue( 0.0 );
         IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
         if( columnIndex != paddingIndex )
         {
            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
            IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
            columns_view[ thisGlobalIdx ] = columnIndex;
            values_view[ thisGlobalIdx ] = value;
            rowLocalIndexes_view[ rowIdx ] = localIdx;
         }
      };
      matrix.forAllRows( f );
   }
   else*/
   else
   {
      const IndexType maxRowLength = max( rowLengths );
      const IndexType bufferRowsCount( 128 );
@@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix )
         thisColumnsBuffer_view = matrixColumnsBuffer_view;

         ////
         // Copy matrix elements from the buffer to the matrix
         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
            const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
            const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
            if( column != paddingIndex )
         // Copy matrix elements from the buffer to the matrix and ignoring
         // zero matrix elements
         const IndexType matrix_columns = this->getColumns();
         auto matrix_view = matrix.getView();
         auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
            RealType inValue( 0.0 );
            IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
            auto matrixRow = matrix_view.getRow( rowIdx );
            while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns )
            {
               bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
               inValue = thisValuesBuffer_view[ bufferIdx ];
            }
            rowLocalIndexes_view[ rowIdx ] = localIdx;
            if( inValue == 0.0 )
            {
               columnIndex = column;
               value = thisValuesBuffer_view[ bufferIdx ];
               columnIndex = paddingIndex;
               value = 0.0;
            }
            else
            {
               columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
               value = inValue;
            }
         };
         this->forRows( baseRow, lastRow, f2 );
+1 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ auto
TridiagonalMatrixRowView< ValuesView, Indexer >::
getSize() const -> IndexType
{
   return indexer.getRowSize();
   return indexer.getRowSize( rowIdx );
}

template< typename ValuesView, typename Indexer >
+2 −0
Original line number Diff line number Diff line
@@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
      template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
      bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;

      __cuda_callable__
      RowView getRow( const IndexType& rowIdx );

      __cuda_callable__
      const RowView getRow( const IndexType& rowIdx ) const;

      void setValue( const RealType& v );
+9 −8
Original line number Diff line number Diff line
@@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const
{
   const auto values_view = this->values.getConstView();
   const auto indexer = this->indexer;
   bool compute( true );
   auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
      if( rowIdx == 0 )
      {
         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
         function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
         function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
      } 
      else if( rowIdx + 1 < indexer.getColumns() )
      {
         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
         function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
      }
      else if( rowIdx < indexer.getColumns() )
      {
         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
         function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
         function( rowIdx, 1, rowIdx,     values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
      }
      else
         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
         function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
   };
   Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
}
Loading