Debuging assignment of tridiagonal matrix to sparse matrix. (c2514bc6) · Commits · TNL / tnl-dev

src/TNL/Matrices/SparseMatrix.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -73,7 +73,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator >
		const RealAllocatorType& realAllocator = RealAllocatorType(),
		const IndexAllocatorType& indexAllocator = IndexAllocatorType() );

		ViewType getView();
		ViewType getView() const; // TODO: remove const

		ConstViewType getConstView() const;

src/TNL/Matrices/SparseMatrix.hpp

+41 −18

Original line number	Diff line number	Diff line
		@@ -82,13 +82,13 @@ template< typename Real,
		typename IndexAllocator >
		auto
		SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::
		getView() -> ViewType
		getView() const -> ViewType
		{
		return ViewType( this->getRows(),
		this->getColumns(),
		this->getValues().getView(),
		this->columnIndexes.getView(),
		this->segments.getView() );
		const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast
		const_cast< SparseMatrix* >( this )->columnIndexes.getView(),
		const_cast< SparseMatrix* >( this )->segments.getView() );
		}

		template< typename Real,
		@@ -624,7 +624,8 @@ operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >&
		thisValuesBuffer_view = matrixValuesBuffer_view;

		////
		// Copy matrix elements from the buffer to the matrix
		// Copy matrix elements from the buffer to the matrix and ignoring
		// zero matrix elements.
		const IndexType matrix_columns = this->getColumns();
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
		RealType inValue( 0.0 );
		@@ -672,34 +673,41 @@ operator=( const RHSMatrix& matrix )
		using RHSRealType = typename RHSMatrix::RealType;
		using RHSDeviceType = typename RHSMatrix::DeviceType;
		using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType;
		using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType;
		using RHSIndexAllocatorType = typename Allocators::Default< RHSDeviceType >::template Allocator< RHSIndexType >;

		typename RHSMatrix::RowsCapacitiesType rowLengths;
		Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths;
		matrix.getCompressedRowLengths( rowLengths );
		this->setDimensions( matrix.getRows(), matrix.getColumns() );
		this->setCompressedRowLengths( rowLengths );
		Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() );
		rowLocalIndexes = 0;


		// TODO: use getConstView when it works
		const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView();
		const IndexType paddingIndex = this->getPaddingIndex();
		auto columns_view = this->columnIndexes.getView();
		auto values_view = this->values.getView();
		auto rowLocalIndexes_view = rowLocalIndexes.getView();
		columns_view = paddingIndex;

		/*if( std::is_same< DeviceType, RHSDeviceType >::value )
		if( std::is_same< DeviceType, RHSDeviceType >::value )
		{
		const auto segments_view = this->segments.getView();
		auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
		auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable {
		RealType inValue( 0.0 );
		IndexType localIdx( rowLocalIndexes_view[ rowIdx ] );
		if( columnIndex != paddingIndex )
		{
		IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx );
		IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ );
		columns_view[ thisGlobalIdx ] = columnIndex;
		values_view[ thisGlobalIdx ] = value;
		rowLocalIndexes_view[ rowIdx ] = localIdx;
		}
		};
		matrix.forAllRows( f );
		}
		else*/
		else
		{
		const IndexType maxRowLength = max( rowLengths );
		const IndexType bufferRowsCount( 128 );
		@@ -739,14 +747,29 @@ operator=( const RHSMatrix& matrix )
		thisColumnsBuffer_view = matrixColumnsBuffer_view;

		////
		// Copy matrix elements from the buffer to the matrix
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
		const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx;
		const IndexType column = thisColumnsBuffer_view[ bufferIdx ];
		if( column != paddingIndex )
		// Copy matrix elements from the buffer to the matrix and ignoring
		// zero matrix elements
		const IndexType matrix_columns = this->getColumns();
		auto matrix_view = matrix.getView();
		auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx_, IndexType& columnIndex, RealType& value, bool& compute ) mutable {
		RealType inValue( 0.0 );
		IndexType bufferIdx, localIdx( rowLocalIndexes_view[ rowIdx ] );
		auto matrixRow = matrix_view.getRow( rowIdx );
		while( inValue == 0.0 && localIdx < matrixRow.getSize() ) //matrix_columns )
		{
		bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx++;
		inValue = thisValuesBuffer_view[ bufferIdx ];
		}
		rowLocalIndexes_view[ rowIdx ] = localIdx;
		if( inValue == 0.0 )
		{
		columnIndex = column;
		value = thisValuesBuffer_view[ bufferIdx ];
		columnIndex = paddingIndex;
		value = 0.0;
		}
		else
		{
		columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1;
		value = inValue;
		}
		};
		this->forRows( baseRow, lastRow, f2 );

src/TNL/Matrices/TridiagonalMatrixRowView.hpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -29,7 +29,7 @@ auto
		TridiagonalMatrixRowView< ValuesView, Indexer >::
		getSize() const -> IndexType
		{
		return indexer.getRowSize();
		return indexer.getRowSize( rowIdx );
		}

		template< typename ValuesView, typename Indexer >

src/TNL/Matrices/TridiagonalMatrixView.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -75,8 +75,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index >
		template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ >
		bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const;

		__cuda_callable__
		RowView getRow( const IndexType& rowIdx );

		__cuda_callable__
		const RowView getRow( const IndexType& rowIdx ) const;

		void setValue( const RealType& v );

src/TNL/Matrices/TridiagonalMatrixView.hpp

+9 −8

Original line number	Diff line number	Diff line
		@@ -340,25 +340,26 @@ forRows( IndexType first, IndexType last, Function& function ) const
		{
		const auto values_view = this->values.getConstView();
		const auto indexer = this->indexer;
		bool compute( true );
		auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable {
		if( rowIdx == 0 )
		{
		function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] );
		function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] );
		function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute );
		function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute );
		}
		else if( rowIdx + 1 < indexer.getColumns() )
		{
		function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
		function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
		function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] );
		function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
		function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
		function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute );
		}
		else if( rowIdx < indexer.getColumns() )
		{
		function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
		function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] );
		function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
		function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute );
		}
		else
		function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] );
		function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute );
		};
		Algorithms::ParallelFor< DeviceType >::exec( first, last, f );
		}