Implementing CUDA benchmark. (f2b7e727) · Commits · TNL / tnl-dev

tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h

+84 −18

Original line number	Diff line number	Diff line
		@@ -153,6 +153,59 @@ makeSnapshot( const RealType& time,
		return true;
		}

		#ifdef HAVE_CUDA

		template< typename Real, typename Index >
		__global__ void boundaryConditionsKernel( Real* u,
		Real* fu,
		const Index gridXSize, const Index gridYSize )
		{
		const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x;
		const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y;
		if( i == 0 && j < gridYSize )
		{
		fu[ j * gridXSize ] = 0.0;
		u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
		}
		if( i == gridXSize - 1 && j < gridYSize )
		{
		fu[ j * gridXSize + gridYSize - 1 ] = 0.0;
		u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		}
		if( j == 0 && i > 0 && i < gridXSize - 1 )
		{
		fu[ i ] = 0.0; //u[ j * gridXSize + 1 ];
		u[ i ] = 0.0; //u[ j * gridXSize + 1 ];
		}
		if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 )
		{
		fu[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		}
		}


		template< typename Real, typename Index >
		__global__ void heatEquationKernel( const Real* u,
		Real* fu,
		const Real tau,
		const Real hx_inv,
		const Real hy_inv,
		const Index gridXSize,
		const Index gridYSize )
		{
		const Index i = blockIdx.x * blockDim.x + threadIdx.x;
		const Index j = blockIdx.y * blockDim.y + threadIdx.y;
		if( i > 0 && i < gridXSize - 1 &&
		j > 0 && j < gridYSize - 1 )
		{
		const Index c = j * gridXSize + i;
		fu[ c ] = ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv +
		( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv;
		}
		}
		#endif

		template< typename Mesh,
		typename BoundaryCondition,
		typename RightHandSide,
		@@ -200,24 +253,37 @@ getExplicitRHS( const RealType& time,
		( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv );
		}
		}
		if( std::is_same< DeviceType, tnlCuda >::value )
		{
		const IndexType gridXSize = mesh.getDimensions().x();
		const IndexType gridYSize = mesh.getDimensions().y();
		const RealType& hx_inv = mesh.template getSpaceStepsProducts< -2, 0 >();
		const RealType& hy_inv = mesh.template getSpaceStepsProducts< 0, -2 >();

		dim3 cudaBlockSize( 16, 16 );
		dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ),
		gridYSize / 16 + ( gridYSize % 16 != 0 ) );

		/*this->bindDofs( mesh, _u );
		tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater;
		MeshFunctionType u( mesh, _u );
		MeshFunctionType fu( mesh, _fu );
		explicitUpdater.template update< typename Mesh::Cell >( time,
		mesh,
		this->differentialOperator,
		this->boundaryCondition,
		this->rightHandSide,
		u,
		fu );
		tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter;
		boundaryConditionsSetter.template apply< typename Mesh::Cell >(
		this->boundaryCondition,
		time + tau,
		u ); */
		int cudaErr;
		boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( u.getData(), fu.getData(), gridXSize, gridYSize );
		if( ( cudaErr = cudaGetLastError() ) != cudaSuccess )
		{
		cerr << "Setting of boundary conditions failed. " << cudaErr << endl;
		return;
		}

		/****
		* Laplace operator
		*/
		//cout << "Laplace operator ... " << endl;
		heatEquationKernel<<< cudaGridSize, cudaBlockSize >>>
		( u.getData(), fu.getData(), tau, hx_inv, hy_inv, gridXSize, gridYSize );
		if( cudaGetLastError() != cudaSuccess )
		{
		cerr << "Laplace operator failed." << endl;
		return;
		}
		}
		}

		template< typename Mesh,

tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h

+16 −5

Original line number	Diff line number	Diff line
		@@ -36,21 +36,32 @@ struct Data
		#ifdef HAVE_CUDA

		template< typename Real, typename Index >
		__global__ void boundaryConditionsKernel( const Real* u, Real* aux,
		__global__ void boundaryConditionsKernel( Real* u,
		Real* aux,
		const Index gridXSize, const Index gridYSize )
		{
		const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x;
		const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y;
		if( i == 0 && j < gridYSize )
		aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
		{
		aux[ j * gridXSize ] = 0.0;
		u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ];
		}
		if( i == gridXSize - 1 && j < gridYSize )
		aux[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		{
		aux[ j * gridXSize + gridYSize - 1 ] = 0.0;
		u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		}
		if( j == 0 && i > 0 && i < gridXSize - 1 )
		{
		aux[ i ] = 0.0; //u[ j * gridXSize + 1 ];
		u[ i ] = 0.0; //u[ j * gridXSize + 1 ];
		}
		if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 )
		{
		aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];


		u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ];
		}
		}

tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h

+16 −11

Original line number	Diff line number	Diff line
		@@ -222,7 +222,9 @@ bool writeFunction(
		const Index xSize,
		const Index ySize,
		const Real& hx,
		const Real& hy )
		const Real& hy,
		const Real& originX,
		const Real& originY )
		{
		fstream file;
		file.open( fileName, ios::out );
		@@ -234,7 +236,7 @@ bool writeFunction(
		for( Index i = 0; i < xSize; i++ )
		{
		for( Index j = 0; j < ySize; j++ )
		file << i * hx << " " << j * hy << " " << data[ j * xSize + i ] << endl;
		file << i * hx - originX << " " << j * hy - originY << " " << data[ j * xSize + i ] << endl;
		file << endl;
		}
		}
		@@ -308,8 +310,11 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
		cerr << "Allocation failed. " << cudaErr << endl;
		return false;
		}
		//cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
		//writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy );
		tnlVector< Real, tnlCuda, Index > vecAux;
		vecAux.bind( cuda_aux, gridXSize * gridYSize );
		vecAux.setValue( 0.0 );
		cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
		writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );

		/****
		* Explicit Euler solver
		@@ -340,9 +345,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
		return false;
		computationTimer.stop();

		//cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
		//writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy );
		//getchar();
		/cudaMemcpy( aux, cuda_aux, dofsCount sizeof( Real ), cudaMemcpyDeviceToHost );
		writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
		getchar();*/

		updateTimer.start();
		/****
		@@ -355,9 +360,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
		cerr << "Update failed." << endl;
		return false;
		}
		//cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
		//writeFunction( "u", aux, gridXSize, gridYSize, hx, hy );
		//getchar();
		/cudaMemcpy( aux, cuda_u, dofsCount sizeof( Real ), cudaMemcpyDeviceToHost );
		writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );
		getchar();*/


		cudaThreadSynchronize();
		@@ -382,7 +387,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters,
		}
		timer.stop();
		cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost );
		writeFunction( "final", u, gridXSize, gridYSize, hx, hy );
		writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 );

		/****
		* Saving the result