Moved changes from matrices-csr to IK/matrices-csr (032d4ec0) · Commits · TNL / tnl-dev

src/TNL/Matrices/Legacy/CSR.h

+19 −1

Original line number	Diff line number	Diff line
		@@ -231,6 +231,24 @@ public:
		void vectorProductCuda( const InVector& inVector,
		OutVector& outVector,
		int gridIdx ) const;

		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void spmvCudaLightSpmv( const InVector& inVector,
		OutVector& outVector,
		int gridIdx) const;

		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void spmvCudaVector( const InVector& inVector,
		OutVector& outVector,
		int gridIdx) const;


		#endif

		// The following getters allow us to interface TNL with external C-like

src/TNL/Matrices/Legacy/CSR_impl.h

+108 −0

Original line number	Diff line number	Diff line
		@@ -671,6 +671,111 @@ Index CSR< Real, Device, Index >::getHybridModeSplit() const
		}

		#ifdef HAVE_CUDA

		template< typename Real,
		typename Device,
		typename Index >
		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void CSR< Real, Device, Index >::spmvCudaLightSpmv( const InVector& inVector,
		OutVector& outVector,
		int gridIdx) const
		{
		const IndexType index = blockIdx.x * blockDim.x + threadIdx.x;
		const IndexType elemPerThread = 4;
		const IndexType laneID = index % 32;
		const IndexType groupID = laneID / elemPerThread;
		const IndexType inGroupID = laneID % elemPerThread;

		IndexType row, minID, column, maxID, idxMtx;
		__shared__ unsigned rowCnt;

		if (index == 0) rowCnt = 0; // Init shared variable
		__syncthreads();

		while (true) {

		/* Get row number */
		if (inGroupID == 0) row = atomicAdd(&rowCnt, 1);

		/* Propagate row number in group */
		row = __shfl_sync((unsigned)(warpSize - 1), row, groupID * elemPerThread);

		if (row >= this->rowPointers.getSize() - 1)
		return;

		minID = this->rowPointers[row];
		maxID = this->rowPointers[row + 1];

		Real result = 0.0;

		idxMtx = minID + inGroupID;
		while (idxMtx < maxID) {
		column = this->columnIndexes[idxMtx];
		if (column >= this->getColumns()) {
		break;
		}
		result += this->values[idxMtx] * inVector[column];
		idxMtx += elemPerThread;
		}

		/* Parallel reduction */
		for (int i = elemPerThread/2; i > 0; i /= 2)
		result += __shfl_down_sync((unsigned)(warpSize - 1), result, i);
		/* Write result */
		if (inGroupID == 0) {
		outVector[row] = result;
		}
		}
		}

		template< typename Real,
		typename Device,
		typename Index >
		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void CSR< Real, Device, Index >::spmvCudaVector( const InVector& inVector,
		OutVector& outVector,
		int gridIdx) const
		{
		const IndexType index = blockIdx.x * blockDim.x + threadIdx.x;
		const IndexType laneID = index % warpSize;
		const IndexType row = index / warpSize; // warpID - one warp per row
		volatile Real result = 0.0;

		if (row >= this->rowPointers.getSize() - 1)
		return;

		IndexType minID = this->rowPointers[row];
		IndexType maxID = this->rowPointers[row + 1];

		/* Calculate results */
		for (IndexType idxMtx = minID + laneID; idxMtx < maxID; idxMtx += warpSize) {
		IndexType column = this->columnIndexes[idxMtx];
		if (column >= this->getColumns()) {
		break;
		}
		// printf("%lf %lf %lf\n", (double)this->values[idxMtx], (double)inVector[column], (double)(this->values[idxMtx] * inVector[column]));
		result += this->values[idxMtx] * inVector[column];
		}
		// printf("=============== lane %d warp %d res %lf\n", (int)laneID, (int)row, (double)result);
		/* Parallel reduction */
		result += __shfl_down_sync((warpSize - 1), result, 16);
		result += __shfl_down_sync((warpSize - 1), result, 8);
		result += __shfl_down_sync((warpSize - 1), result, 4);
		result += __shfl_down_sync((warpSize - 1), result, 2);
		result += __shfl_down_sync((warpSize - 1), result, 1);

		/* Write result */
		if (laneID == 0) {
		outVector[row] = result;
		}
		}

		template< typename Real,
		typename Device,
		typename Index >
		@@ -724,6 +829,9 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector,
		OutVector& outVector,
		int gridIdx ) const
		{
		spmvCudaLightSpmv< InVector, OutVector, warpSize >( inVector, outVector, gridIdx );
		return;

		IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
		const IndexType warpStart = warpSize * ( globalIdx / warpSize );
		const IndexType warpEnd = min( warpStart + warpSize, this->getRows() );

src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp

+258 −0

Original line number	Diff line number	Diff line
		@@ -1220,6 +1220,264 @@ void test_VectorProduct()
		EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
		}

		template< typename Matrix >
		void test_VectorProductLarger()
		{
		using RealType = typename Matrix::RealType;
		using DeviceType = typename Matrix::DeviceType;
		using IndexType = typename Matrix::IndexType;

		/*
		* Sets up the following 20x20 sparse matrix:
		*
		* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
		* ------------------------------------------------------------------------------
		* 0 / 1 0 0 0 0 0 0 0 0 0 7 7 7 7 7 7 7 7 7 7 \
		* 1 \| 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \|
		* 2 \| 0 0 3 3 3 0 0 0 0 0 0 0 0 0 0 -2 0 0 0 0 \|
		* 3 \| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \|
		* 4 \| 0 10 0 11 0 12 0 0 0 0 0 0 0 0 0 0 0 -48 -49 -50 \|
		* 5 \| 4 0 0 0 0 4 0 0 0 0 4 0 0 0 0 4 0 0 0 0 \|
		* 6 \| 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \|
		* 7 \| 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -6 \|
		* 8 \| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \|
		* 9 \| 1 2 3 4 5 6 7 8 9 0 0 -1 -2 -3 -4 -5 -6 -7 -8 -9 \|
		* 10 \| 7 0 0 0 3 3 3 3 0 0 0 0 0 0 0 -6 0 0 0 0 \|
		* 11 \| -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 \|
		* 12 \| 0 9 0 9 0 9 0 9 0 9 0 9 0 9 0 9 0 9 0 9 \|
		* 13 \| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \|
		* 14 \| -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 \|
		* 15 \| 4 0 4 0 4 0 4 0 4 0 4 0 4 0 4 0 4 0 4 0 \|
		* 16 \| 0 -2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 0 \|
		* 17 \| -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 \|
		* 18 \| 0 7 0 7 0 7 0 7 0 7 0 7 0 7 0 7 0 7 0 7 \|
		* 19 \ 8 8 8 0 0 8 8 8 0 0 8 8 8 0 0 8 8 8 0 0 /
		*/
		const IndexType m_rows = 20;
		const IndexType m_cols = 20;

		Matrix m;
		m.reset();
		m.setDimensions( m_rows, m_cols );
		typename Matrix::CompressedRowLengthsVector rowLengths;
		rowLengths.setSize( m_rows );
		rowLengths.setValue( 20 );
		m.setCompressedRowLengths( rowLengths );

		/* 0 */
		m.setElement( 0, 0, 1 );
		for ( IndexType i = 10; i < m_cols; ++i )
		m.setElement( 0, i, 7 );

		/* 1 */
		m.setElement( 1, 1, 2 );

		/* 2 */
		m.setElement( 2, 2, 3 );
		m.setElement( 2, 3, 3 );
		m.setElement( 2, 4, 3 );
		m.setElement( 2, 15, -2 );

		/* 4 */
		m.setElement( 4, 1, 10 );
		m.setElement( 4, 3, 11 );
		m.setElement( 4, 5, 12 );
		m.setElement( 4, 17, -48 );
		m.setElement( 4, 18, -49 );
		m.setElement( 4, 19, -50 );

		/* 5 */
		for ( IndexType i = 0; i < m_cols; i += 5 )
		m.setElement( 5, i, 4 );

		/* 6 */
		m.setElement( 6, 1, 5 );

		/* 7 */
		m.setElement( 7, 2, 6 );
		m.setElement( 7, 19, -6 );

		/* 8 */
		for ( IndexType i = 1; i <= m_cols; ++i )
		m.setElement( 8, i - 1, i );

		/* 9 */
		for ( IndexType i = 1; i <= 9; ++i )
		m.setElement( 9, i - 1, i );

		for ( IndexType i = 11; i <= 19; ++i )
		m.setElement( 9, i, -(i - 10) );

		/* 10 */
		m.setElement( 10, 0, 7 );
		m.setElement( 10, 4, 3 );
		m.setElement( 10, 5, 3 );
		m.setElement( 10, 6, 3 );
		m.setElement( 10, 7, 3 );
		m.setElement( 10, 15, -6 );

		/* 11 && 14 && 17 */
		for (IndexType i = 1; i <= m_cols; ++i) {
		m.setElement(11, i - 1, -i);
		m.setElement(14, i - 1, -i);
		m.setElement(17, i - 1, -i);
		}

		/* 12 && 18 */
		for (IndexType i = 0; i < m_cols; ++i) {
		if( i % 2 ) {
		m.setElement(12, i, 9);
		m.setElement(18, i, 7);
		}
		}

		/* 15 */
		for (IndexType i = 0; i < m_cols; ++i)
		if (i % 2 == 0)
		m.setElement(15, i, 4);

		/* 16 */
		m.setElement(16, 1, -2);
		m.setElement(16, 18, -2);

		/* 19 */
		for (IndexType i = 0; i < m_cols; i += 5 ) {
		m.setElement(19, i, 8);
		m.setElement(19, i + 1, 8);
		m.setElement(19, i + 2, 8);
		}

		using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;

		VectorType inVector;
		inVector.setSize( 20 );
		for( IndexType i = 0; i < inVector.getSize(); i++ )
		inVector.setElement( i, 2 );

		VectorType outVector;
		outVector.setSize( 20 );
		for( IndexType j = 0; j < outVector.getSize(); j++ )
		outVector.setElement( j, 0 );

		m.vectorProduct( inVector, outVector);

		EXPECT_EQ( outVector.getElement( 0 ), 142 );
		EXPECT_EQ( outVector.getElement( 1 ), 4 );
		EXPECT_EQ( outVector.getElement( 2 ), 14 );
		EXPECT_EQ( outVector.getElement( 3 ), 0 );
		EXPECT_EQ( outVector.getElement( 4 ), -228 );
		EXPECT_EQ( outVector.getElement( 5 ), 32 );
		EXPECT_EQ( outVector.getElement( 6 ), 10 );
		EXPECT_EQ( outVector.getElement( 7 ), 0 );
		EXPECT_EQ( outVector.getElement( 8 ), 420 );
		EXPECT_EQ( outVector.getElement( 9 ), 0 );
		EXPECT_EQ( outVector.getElement( 10 ), 26 );
		EXPECT_EQ( outVector.getElement( 11 ), -420 );
		EXPECT_EQ( outVector.getElement( 12 ), 180 );
		EXPECT_EQ( outVector.getElement( 13 ), 0 );
		EXPECT_EQ( outVector.getElement( 14 ), -420 );
		EXPECT_EQ( outVector.getElement( 15 ), 80 );
		EXPECT_EQ( outVector.getElement( 16 ), -8 );
		EXPECT_EQ( outVector.getElement( 17 ), -420 );
		EXPECT_EQ( outVector.getElement( 18 ), 140 );
		EXPECT_EQ( outVector.getElement( 19 ), 192 );
		}

		template< typename Matrix >
		void test_VectorProductGiant()
		{
		using RealType = typename Matrix::RealType;
		using DeviceType = typename Matrix::DeviceType;
		using IndexType = typename Matrix::IndexType;
		const IndexType N = 200; // Should be a multiple of 10
		/*
		* Sets up the following NxN sparse matrix:
		*
		* / 1 0 1 0 ... 1 0 \
		* \| 0 0 0 0 ... 0 0 \|
		* \| 0 2 0 2 ... 0 2 \|
		* \| 0 0 0 0 ... 0 0 \|
		* \| 3 -3 3 -3 ... 3 -3 \|
		* \| 0 0 0 0 ... 0 0 \|
		* \| 5 5 5 5 ... 5 5 \|
		* \| 0 0 0 0 ... 0 0 \|
		* \| 0 0 0 0 ... 0 0 \|
		* \| 2 0 0 0 ... 0 0 \| <- the only 2 in the row
		*
		* ..................
		*/

		const IndexType m_rows = N;
		const IndexType m_cols = N;

		Matrix m;
		m.reset();
		m.setDimensions( m_rows, m_cols );
		typename Matrix::CompressedRowLengthsVector rowLengths;
		rowLengths.setSize( m_rows );
		rowLengths.setValue( m_cols );
		m.setCompressedRowLengths( rowLengths );

		for ( IndexType row = 0; row < m_rows; ++row ) {
		if (row % 10 == 0) {
		for ( IndexType col = 0; col < m_cols; ++col ) {
		if (col % 2)
		m.setElement( row, col, 1 );
		}
		}
		else if (row % 10 == 2) {
		for ( IndexType col = 0; col < m_cols; ++col ) {
		if (col % 2 == 1)
		m.setElement( row, col, 2 );
		}
		}
		else if (row % 10 == 4) {
		for ( IndexType col = 0; col < m_cols; ++col ) {
		if (col % 2)
		m.setElement( row, col, 3 );
		else
		m.setElement( row, col, -3 );
		}
		}
		else if (row % 10 == 6) {
		for ( IndexType col = 0; col < m_cols; ++col ) {
		m.setElement( row, col, 5 );
		}
		}
		else if (row % 10 == 9) {
		m.setElement( row, 0, 2 );
		}
		}

		using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;

		VectorType inVector;
		inVector.setSize( N );
		for( IndexType i = 0; i < inVector.getSize(); i++ )
		inVector.setElement( i, 2 );

		VectorType outVector;
		outVector.setSize( N );
		for( IndexType j = 0; j < outVector.getSize(); j++ )
		outVector.setElement( j, 0 );


		m.vectorProduct( inVector, outVector);

		for ( IndexType i = 0; i < outVector.getSize(); ++i ) {
		if ( i % 10 == 0 )
		EXPECT_EQ( outVector.getElement( i ), 200 /* (N) */ );
		else if ( i % 10 == 2 )
		EXPECT_EQ( outVector.getElement( i ), 400 /* (2 * N) */ );
		else if ( i % 10 == 6 )
		EXPECT_EQ( outVector.getElement( i ), 2000 /* (10 * N) */ );
		else if ( i % 10 == 9 )
		EXPECT_EQ( outVector.getElement( i ), 4 );
		else
		EXPECT_EQ( outVector.getElement( i ), 0 );
		}
		}

		template< typename Matrix >
		void test_RowsReduction()
		{

src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h

+14 −0

Original line number	Diff line number	Diff line
		@@ -112,6 +112,20 @@ TYPED_TEST( CSRMatrixTest, vectorProductTest )
		test_VectorProduct< CSRMatrixType >();
		}

		TYPED_TEST( CSRMatrixTest, vectorProductLargerTest )
		{
		using CSRMatrixType = typename TestFixture::CSRMatrixType;

		test_VectorProductLarger< CSRMatrixType >();
		}

		TYPED_TEST( CSRMatrixTest, vectorProductGiantTest )
		{
		using CSRMatrixType = typename TestFixture::CSRMatrixType;

		test_VectorProductGiant< CSRMatrixType >();
		}

		TYPED_TEST( CSRMatrixTest, saveAndLoadTest )
		{
		using CSRMatrixType = typename TestFixture::CSRMatrixType;