Tuning Ellpack formats. (994ee56d) · Commits · TNL / tnl-dev

src/matrices/tnlEllpackMatrix_impl.h

+14 −4

Original line number	Diff line number	Diff line
		@@ -687,11 +687,21 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost >
		const InVector& inVector,
		OutVector& outVector )
		{
		#ifdef HAVE_OPENMP
		#pragma omp parallel for
		#endif
		//#ifdef HAVE_OPENMP
		//#pragma omp parallel for
		//#endif
		// for( Index row = 0; row < matrix.getRows(); row ++ )
		// outVector[ row ] = matrix.rowVectorProduct( row, inVector );
		Index col;
		for( Index row = 0; row < matrix.getRows(); row ++ )
		outVector[ row ] = matrix.rowVectorProduct( row, inVector );
		{
		outVector[ row ] = 0.0;
		const Index rowEnd = ( row + 1 ) * matrix.rowLengths;
		for( Index i = row * matrix.rowLengths; i < rowEnd; i++ )
		if( ( col = matrix.columnIndexes[ i ] ) < matrix.columns )
		outVector[ row ] += matrix.values[ i ] * inVector[ col ];
		}

		}
		};

+5 −4

Original line number	Diff line number	Diff line
		@@ -252,6 +252,7 @@ int main( int argc, char* argv[] )
		int elements( 0 );
		for( int row = 0; row < size; row++ )
		{
		if( row % 100 == 0 )
		cout << "Row " << row << "/" << size << " \r" << flush;
		int col = Max( 0, row - elementsPerRow / 2 );
		for( int element = 0; element < elementsPerRow; element++ )
		@@ -266,7 +267,7 @@ int main( int argc, char* argv[] )
		}
		cout << endl;
		setCudaTestMatrix< DeviceMatrix >( deviceMatrix, elementsPerRow );
		datasetSize = loops * elements * sizeof( double ) / oneGB;
		datasetSize = loops * elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
		hostVector.setValue( 1.0 );
		deviceVector.setValue( 1.0 );
		cout << "Benchmarking SpMV on CPU: ";
		@@ -275,7 +276,7 @@ int main( int argc, char* argv[] )
		hostMatrix.vectorProduct( hostVector, hostVector2 );
		timer.stop();
		double hostTime = timer.getTime();
		bandwidth = 2 * datasetSize / loops / timer.getTime();
		bandwidth = datasetSize / timer.getTime();
		cout << timer.getTime() << " => " << bandwidth << " GB/s" << endl;

		cout << "Benchmarking SpMV on GPU: ";
		@@ -293,7 +294,7 @@ int main( int argc, char* argv[] )
		// cerr << " " << i;

		}
		bandwidth = 2 * datasetSize / loops / timer.getTime();
		bandwidth = datasetSize / timer.getTime();
		cout << timer.getTime() << " => " << bandwidth << " GB/s" << " speedup " << hostTime / timer.getTime() << endl;

		return EXIT_SUCCESS;