Loading src/TNL/Matrices/SparseMatrix.h +1 −1 Original line number Diff line number Diff line Loading @@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); ViewType getView(); ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; Loading src/TNL/Matrices/SparseMatrix.hpp +41 −18 Original line number Diff line number Diff line Loading @@ -82,13 +82,13 @@ template< typename Real, typename IndexAllocator > auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getView() -> ViewType getView() const -> ViewType { return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->columnIndexes.getView(), this->segments.getView() ); const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast const_cast< SparseMatrix* >( this )->columnIndexes.getView(), const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, Loading Loading @@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& thisValuesBuffer_view = matrixValuesBuffer_view; //// // Copy matrix elements from the buffer to the matrix // Copy matrix elements from the buffer to the matrix and ignoring // zero matrix elements. const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); Loading Loading @@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix ) using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >; typename RHSMatrix::RowsCapacitiesType rowLengths; Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); rowLocalIndexes = 0; // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; /*if( std::is_same< DeviceType, RHSDeviceType >::value ) if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); if( columnIndex != paddingIndex ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; values_view[ thisGlobalIdx ] = value; rowLocalIndexes_view[ rowIdx ] = localIdx; } }; matrix.forAllRows( f ); } else*/ else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); Loading Loading @@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix ) thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy matrix elements from the buffer to the matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; if( column != paddingIndex ) // Copy matrix elements from the buffer to the matrix and ignoring // zero matrix elements const IndexType matrix_columns = this->getColumns(); auto matrix_view = matrix.getView(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); auto matrixRow = matrix_view.getRow( rowIdx ); while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns ) { bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; inValue = thisValuesBuffer_view[ bufferIdx ]; } rowLocalIndexes_view[ rowIdx ] = localIdx; if( inValue == 0.0 ) { columnIndex = column; value = thisValuesBuffer_view[ bufferIdx ]; columnIndex = paddingIndex; value = 0.0; } else { columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); Loading src/TNL/Matrices/TridiagonalMatrixRowView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ auto TridiagonalMatrixRowView< ValuesView, Indexer >:: getSize() const -> IndexType { return indexer.getRowSize(); return indexer.getRowSize( rowIdx ); } template< typename ValuesView, typename Indexer > Loading src/TNL/Matrices/TridiagonalMatrixView.h +2 −0 Original line number Diff line number Diff line Loading @@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); Loading src/TNL/Matrices/TridiagonalMatrixView.hpp +9 −8 Original line number Diff line number Diff line Loading @@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); const auto indexer = this->indexer; bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); } else if( rowIdx + 1 < indexer.getColumns() ) { function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); } else if( rowIdx < indexer.getColumns() ) { function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); } else function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } Loading Loading
src/TNL/Matrices/SparseMatrix.h +1 −1 Original line number Diff line number Diff line Loading @@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); ViewType getView(); ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; Loading
src/TNL/Matrices/SparseMatrix.hpp +41 −18 Original line number Diff line number Diff line Loading @@ -82,13 +82,13 @@ template< typename Real, typename IndexAllocator > auto SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: getView() -> ViewType getView() const -> ViewType { return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), this->columnIndexes.getView(), this->segments.getView() ); const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast const_cast< SparseMatrix* >( this )->columnIndexes.getView(), const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, Loading Loading @@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& thisValuesBuffer_view = matrixValuesBuffer_view; //// // Copy matrix elements from the buffer to the matrix // Copy matrix elements from the buffer to the matrix and ignoring // zero matrix elements. const IndexType matrix_columns = this->getColumns(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); Loading Loading @@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix ) using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >; typename RHSMatrix::RowsCapacitiesType rowLengths; Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; matrix.getCompressedRowLengths( rowLengths ); this->setDimensions( matrix.getRows(), matrix.getColumns() ); this->setCompressedRowLengths( rowLengths ); Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); rowLocalIndexes = 0; // TODO: use getConstView when it works const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); const IndexType paddingIndex = this->getPaddingIndex(); auto columns_view = this->columnIndexes.getView(); auto values_view = this->values.getView(); auto rowLocalIndexes_view = rowLocalIndexes.getView(); columns_view = paddingIndex; /*if( std::is_same< DeviceType, RHSDeviceType >::value ) if( std::is_same< DeviceType, RHSDeviceType >::value ) { const auto segments_view = this->segments.getView(); auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); if( columnIndex != paddingIndex ) { IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx ); IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); columns_view[ thisGlobalIdx ] = columnIndex; values_view[ thisGlobalIdx ] = value; rowLocalIndexes_view[ rowIdx ] = localIdx; } }; matrix.forAllRows( f ); } else*/ else { const IndexType maxRowLength = max( rowLengths ); const IndexType bufferRowsCount( 128 ); Loading Loading @@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix ) thisColumnsBuffer_view = matrixColumnsBuffer_view; //// // Copy matrix elements from the buffer to the matrix auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; const IndexType column = thisColumnsBuffer_view[ bufferIdx ]; if( column != paddingIndex ) // Copy matrix elements from the buffer to the matrix and ignoring // zero matrix elements const IndexType matrix_columns = this->getColumns(); auto matrix_view = matrix.getView(); auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable { RealType inValue( 0.0 ); IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] ); auto matrixRow = matrix_view.getRow( rowIdx ); while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns ) { bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++; inValue = thisValuesBuffer_view[ bufferIdx ]; } rowLocalIndexes_view[ rowIdx ] = localIdx; if( inValue == 0.0 ) { columnIndex = column; value = thisValuesBuffer_view[ bufferIdx ]; columnIndex = paddingIndex; value = 0.0; } else { columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; value = inValue; } }; this->forRows( baseRow, lastRow, f2 ); Loading
src/TNL/Matrices/TridiagonalMatrixRowView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -29,7 +29,7 @@ auto TridiagonalMatrixRowView< ValuesView, Indexer >:: getSize() const -> IndexType { return indexer.getRowSize(); return indexer.getRowSize( rowIdx ); } template< typename ValuesView, typename Indexer > Loading
src/TNL/Matrices/TridiagonalMatrixView.h +2 −0 Original line number Diff line number Diff line Loading @@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; void setValue( const RealType& v ); Loading
src/TNL/Matrices/TridiagonalMatrixView.hpp +9 −8 Original line number Diff line number Diff line Loading @@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); const auto indexer = this->indexer; bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { if( rowIdx == 0 ) { function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); } else if( rowIdx + 1 < indexer.getColumns() ) { function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); } else if( rowIdx < indexer.getColumns() ) { function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); } else function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); } Loading