diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index 7bc83f2d4904bf647d412f19d053776d83c7334b..94470d3d1c174221aa38fbcbe9fd7886e257c77c 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -668,7 +668,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo if( std::is_same< Device, Device_ >::value ) { const auto matrix_view = matrix.getView(); - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; }; this->forAllRows( f ); @@ -695,7 +695,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo //// // Copy matrix elements into buffer - auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value ) mutable { + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixValuesBuffer_view[ bufferIdx ] = value; }; @@ -707,7 +707,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo //// // Copy matrix elements from the buffer to the matrix - auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value ) mutable { + auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value, bool& compute ) mutable { const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; value = thisValuesBuffer_view[ bufferIdx ]; }; diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index 96312d03cfc6618c810b9ba6aad3cef511d8be54..22436846541b991fa8168ad241de078a659e6124 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -216,8 +216,10 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: setValue( const RealType& v ) { + // we dont do this->values = v here because it would set even elements 'outside' the matrix + // method getNumberOfNonzeroElements would not well const RealType newValue = v; - auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value ) mutable { + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable { value = newValue; }; this->forAllRows( f ); @@ -419,12 +421,13 @@ forRows( IndexType first, IndexType last, Function& function ) const IndexType diagonalsCount = this->diagonalsShifts.getSize(); const IndexType columns = this->getColumns(); const auto indexer = this->indexer; + bool compute( true ); auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { - for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) + for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ ) { const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; if( columnIdx >= 0 && columnIdx < columns ) - function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ); + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); } }; Algorithms::ParallelFor< DeviceType >::exec( first, last, f );