Reformatted getNonZeroRowLength to pass on the device Type in string and thus... (991d31a0) · Commits · TNL / tnl-dev

src/TNL/Matrices/CSR_impl.h

+35 −8

Original line number	Diff line number	Diff line
		@@ -136,8 +136,28 @@ template< typename Real,
		typename Index >
		Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
		{
		ConstMatrixRow matrixRow = getRow( row );
		return matrixRow.getNonZeroElementsCount();
		ConstMatrixRow matrixRow = this->getRow( row );
		IndexType count = matrixRow.getNonZeroElementsCount( TNL::String( Device::getDeviceType() ) );
		// return count;
		// getRow() was throwing segmentation faults.
		// FOR THIS TO WORK, I had to change getRow() from [ rowIndex ] to .getElement( rowIndex ).


		// THE FOLLOWING throws: /home/lukas/tnl-dev/src/TNL/ParallelFor.h(92): error: identifier "" is undefined in device code
		// static IndexType elementCount ( 0 );
		// ConstMatrixRow matrixRow = this->getRow( row );
		//
		// elementCount = 0; // Make sure it is reset. Without this seemingly useless step, it returned incorrect values.
		//
		// auto computeNonZeros = [matrixRow] __cuda_callable__ ( IndexType i ) mutable
		// {
		// if( matrixRow.getElementValue( i ) != 0.0 )
		// elementCount++;
		// };
		//
		// ParallelFor< DeviceType >::exec( (IndexType) 0, matrixRow.getLength(), computeNonZeros );
		//
		// return elementCount;
		}

		template< typename Real,
		@@ -439,12 +459,19 @@ typename CSR< Real, Device, Index >::ConstMatrixRow
		CSR< Real, Device, Index >::
		getRow( const IndexType rowIndex ) const
		{
		const IndexType rowOffset = this->rowPointers[ rowIndex ];
		const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
		const IndexType rowOffset = this->rowPointers.getElement( rowIndex );
		const IndexType rowLength = this->rowPointers.getElement( rowIndex + 1 ) - rowOffset;
		return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
		&this->values[ rowOffset ],
		rowLength,
		1 );

		// const IndexType rowOffset = this->rowPointers[ rowIndex ];
		// const IndexType rowLength = this->rowPointers[ rowIndex + 1 ] - rowOffset;
		// return ConstMatrixRow( &this->columnIndexes[ rowOffset ],
		// &this->values[ rowOffset ],
		// rowLength,
		// 1 );
		}

		template< typename Real,

src/TNL/Matrices/ChunkedEllpack_impl.h

+18 −3

Original line number	Diff line number	Diff line
		@@ -198,8 +198,8 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsV
		// will be zero and make the assertion fail ( https://stackoverflow.com/questions/33273359/in-c-using-the-ceil-a-division-is-not-working ).
		// To fix this, typecast them to ( float ), instead of ( RealType )
		maxChunkInSlice = max( maxChunkInSlice,
		ceil( ( float ) rowLengths[ i ] /
		( float ) this->rowToChunkMapping[ i ] ) );
		ceil( ( double ) rowLengths[ i ] /
		( double ) this->rowToChunkMapping[ i ] ) );
		}
		TNL_ASSERT( maxChunkInSlice > 0,
		std::cerr << " maxChunkInSlice = " << maxChunkInSlice << std::endl );
		@@ -314,7 +314,22 @@ template< typename Real,
		Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
		{
		ConstMatrixRow matrixRow = getRow( row );
		return matrixRow.getNonZeroElementsCount();
		return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );

		// IndexType elementCount ( 0 );
		// ConstMatrixRow matrixRow = this->getRow( row );
		//
		// auto computeNonZeros = [&] /__cuda_callable__/ ( IndexType i ) mutable
		// {
		// std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl;
		// if( matrixRow.getElementValue( i ) != 0.0 )
		// elementCount++;
		//
		// std::cout << "End of lambda elementCount = " << elementCount << std::endl;
		// };
		//
		// ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros );
		// return elementCount;
		}

		template< typename Real,

src/TNL/Matrices/Ellpack_impl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -129,7 +129,7 @@ template< typename Real,
		Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
		{
		ConstMatrixRow matrixRow = getRow( row );
		return matrixRow.getNonZeroElementsCount();
		return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
		}

		template< typename Real,

src/TNL/Matrices/SlicedEllpack_impl.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -128,7 +128,7 @@ template< typename Real,
		Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
		{
		ConstMatrixRow matrixRow = getRow( row );
		return matrixRow.getNonZeroElementsCount();
		return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
		}

		template< typename Real,

src/TNL/Matrices/SparseRow.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -53,7 +53,7 @@ class SparseRow
		Index getLength() const;

		__cuda_callable__
		Index getNonZeroElementsCount() const;
		Index getNonZeroElementsCount( TNL::String deviceType ) const;

		void print( std::ostream& str ) const;