Loading tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h +84 −18 Original line number Diff line number Diff line Loading @@ -153,6 +153,59 @@ makeSnapshot( const RealType& time, return true; } #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( Real* u, Real* fu, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) { fu[ j * gridXSize ] = 0.0; u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; } if( i == gridXSize - 1 && j < gridYSize ) { fu[ j * gridXSize + gridYSize - 1 ] = 0.0; u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } if( j == 0 && i > 0 && i < gridXSize - 1 ) { fu[ i ] = 0.0; //u[ j * gridXSize + 1 ]; u[ i ] = 0.0; //u[ j * gridXSize + 1 ]; } if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 ) { fu[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } } template< typename Real, typename Index > __global__ void heatEquationKernel( const Real* u, Real* fu, const Real tau, const Real hx_inv, const Real hy_inv, const Index gridXSize, const Index gridYSize ) { const Index i = blockIdx.x * blockDim.x + threadIdx.x; const Index j = blockIdx.y * blockDim.y + threadIdx.y; if( i > 0 && i < gridXSize - 1 && j > 0 && j < gridYSize - 1 ) { const Index c = j * gridXSize + i; fu[ c ] = ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv; } } #endif template< typename Mesh, typename BoundaryCondition, typename RightHandSide, Loading Loading @@ -200,24 +253,37 @@ getExplicitRHS( const RealType& time, ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv ); } } if( std::is_same< DeviceType, tnlCuda >::value ) { const IndexType gridXSize = mesh.getDimensions().x(); const IndexType gridYSize = mesh.getDimensions().y(); const RealType& hx_inv = mesh.template getSpaceStepsProducts< -2, 0 >(); const RealType& hy_inv = mesh.template getSpaceStepsProducts< 0, -2 >(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); /*this->bindDofs( mesh, _u ); tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater; MeshFunctionType u( mesh, _u ); MeshFunctionType fu( mesh, _fu ); explicitUpdater.template update< typename Mesh::Cell >( time, mesh, this->differentialOperator, this->boundaryCondition, this->rightHandSide, u, fu ); tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter; boundaryConditionsSetter.template apply< typename Mesh::Cell >( this->boundaryCondition, time + tau, u ); */ int cudaErr; boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( u.getData(), fu.getData(), gridXSize, gridYSize ); if( ( cudaErr = cudaGetLastError() ) != cudaSuccess ) { cerr << "Setting of boundary conditions failed. " << cudaErr << endl; return; } /**** * Laplace operator */ //cout << "Laplace operator ... " << endl; heatEquationKernel<<< cudaGridSize, cudaBlockSize >>> ( u.getData(), fu.getData(), tau, hx_inv, hy_inv, gridXSize, gridYSize ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Laplace operator failed." << endl; return; } } } template< typename Mesh, Loading tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h +16 −5 Original line number Diff line number Diff line Loading @@ -36,21 +36,32 @@ struct Data #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( const Real* u, Real* aux, __global__ void boundaryConditionsKernel( Real* u, Real* aux, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; { aux[ j * gridXSize ] = 0.0; u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; } if( i == gridXSize - 1 && j < gridYSize ) aux[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; { aux[ j * gridXSize + gridYSize - 1 ] = 0.0; u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } if( j == 0 && i > 0 && i < gridXSize - 1 ) { aux[ i ] = 0.0; //u[ j * gridXSize + 1 ]; u[ i ] = 0.0; //u[ j * gridXSize + 1 ]; } if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 ) { aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } } Loading tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h +16 −11 Original line number Diff line number Diff line Loading @@ -222,7 +222,9 @@ bool writeFunction( const Index xSize, const Index ySize, const Real& hx, const Real& hy ) const Real& hy, const Real& originX, const Real& originY ) { fstream file; file.open( fileName, ios::out ); Loading @@ -234,7 +236,7 @@ bool writeFunction( for( Index i = 0; i < xSize; i++ ) { for( Index j = 0; j < ySize; j++ ) file << i * hx << " " << j * hy << " " << data[ j * xSize + i ] << endl; file << i * hx - originX << " " << j * hy - originY << " " << data[ j * xSize + i ] << endl; file << endl; } } Loading Loading @@ -308,8 +310,11 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, cerr << "Allocation failed. " << cudaErr << endl; return false; } //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy ); tnlVector< Real, tnlCuda, Index > vecAux; vecAux.bind( cuda_aux, gridXSize * gridYSize ); vecAux.setValue( 0.0 ); cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); /**** * Explicit Euler solver Loading Loading @@ -340,9 +345,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, return false; computationTimer.stop(); //cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy ); //getchar(); /*cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); getchar();*/ updateTimer.start(); /**** Loading @@ -355,9 +360,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, cerr << "Update failed." << endl; return false; } //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "u", aux, gridXSize, gridYSize, hx, hy ); //getchar(); /*cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); getchar();*/ cudaThreadSynchronize(); Loading @@ -382,7 +387,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, } timer.stop(); cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); /**** * Saving the result Loading Loading
tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h +84 −18 Original line number Diff line number Diff line Loading @@ -153,6 +153,59 @@ makeSnapshot( const RealType& time, return true; } #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( Real* u, Real* fu, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) { fu[ j * gridXSize ] = 0.0; u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; } if( i == gridXSize - 1 && j < gridYSize ) { fu[ j * gridXSize + gridYSize - 1 ] = 0.0; u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } if( j == 0 && i > 0 && i < gridXSize - 1 ) { fu[ i ] = 0.0; //u[ j * gridXSize + 1 ]; u[ i ] = 0.0; //u[ j * gridXSize + 1 ]; } if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 ) { fu[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } } template< typename Real, typename Index > __global__ void heatEquationKernel( const Real* u, Real* fu, const Real tau, const Real hx_inv, const Real hy_inv, const Index gridXSize, const Index gridYSize ) { const Index i = blockIdx.x * blockDim.x + threadIdx.x; const Index j = blockIdx.y * blockDim.y + threadIdx.y; if( i > 0 && i < gridXSize - 1 && j > 0 && j < gridYSize - 1 ) { const Index c = j * gridXSize + i; fu[ c ] = ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv; } } #endif template< typename Mesh, typename BoundaryCondition, typename RightHandSide, Loading Loading @@ -200,24 +253,37 @@ getExplicitRHS( const RealType& time, ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv ); } } if( std::is_same< DeviceType, tnlCuda >::value ) { const IndexType gridXSize = mesh.getDimensions().x(); const IndexType gridYSize = mesh.getDimensions().y(); const RealType& hx_inv = mesh.template getSpaceStepsProducts< -2, 0 >(); const RealType& hy_inv = mesh.template getSpaceStepsProducts< 0, -2 >(); dim3 cudaBlockSize( 16, 16 ); dim3 cudaGridSize( gridXSize / 16 + ( gridXSize % 16 != 0 ), gridYSize / 16 + ( gridYSize % 16 != 0 ) ); /*this->bindDofs( mesh, _u ); tnlExplicitUpdater< Mesh, MeshFunctionType, DifferentialOperator, BoundaryCondition, RightHandSide > explicitUpdater; MeshFunctionType u( mesh, _u ); MeshFunctionType fu( mesh, _fu ); explicitUpdater.template update< typename Mesh::Cell >( time, mesh, this->differentialOperator, this->boundaryCondition, this->rightHandSide, u, fu ); tnlBoundaryConditionsSetter< MeshFunctionType, BoundaryCondition > boundaryConditionsSetter; boundaryConditionsSetter.template apply< typename Mesh::Cell >( this->boundaryCondition, time + tau, u ); */ int cudaErr; boundaryConditionsKernel<<< cudaGridSize, cudaBlockSize >>>( u.getData(), fu.getData(), gridXSize, gridYSize ); if( ( cudaErr = cudaGetLastError() ) != cudaSuccess ) { cerr << "Setting of boundary conditions failed. " << cudaErr << endl; return; } /**** * Laplace operator */ //cout << "Laplace operator ... " << endl; heatEquationKernel<<< cudaGridSize, cudaBlockSize >>> ( u.getData(), fu.getData(), tau, hx_inv, hy_inv, gridXSize, gridYSize ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Laplace operator failed." << endl; return; } } } template< typename Mesh, Loading
tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h +16 −5 Original line number Diff line number Diff line Loading @@ -36,21 +36,32 @@ struct Data #ifdef HAVE_CUDA template< typename Real, typename Index > __global__ void boundaryConditionsKernel( const Real* u, Real* aux, __global__ void boundaryConditionsKernel( Real* u, Real* aux, const Index gridXSize, const Index gridYSize ) { const Index i = ( blockIdx.x ) * blockDim.x + threadIdx.x; const Index j = ( blockIdx.y ) * blockDim.y + threadIdx.y; if( i == 0 && j < gridYSize ) aux[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; { aux[ j * gridXSize ] = 0.0; u[ j * gridXSize ] = 0.0; //u[ j * gridXSize + 1 ]; } if( i == gridXSize - 1 && j < gridYSize ) aux[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; { aux[ j * gridXSize + gridYSize - 1 ] = 0.0; u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } if( j == 0 && i > 0 && i < gridXSize - 1 ) { aux[ i ] = 0.0; //u[ j * gridXSize + 1 ]; u[ i ] = 0.0; //u[ j * gridXSize + 1 ]; } if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 ) { aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } } Loading
tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h +16 −11 Original line number Diff line number Diff line Loading @@ -222,7 +222,9 @@ bool writeFunction( const Index xSize, const Index ySize, const Real& hx, const Real& hy ) const Real& hy, const Real& originX, const Real& originY ) { fstream file; file.open( fileName, ios::out ); Loading @@ -234,7 +236,7 @@ bool writeFunction( for( Index i = 0; i < xSize; i++ ) { for( Index j = 0; j < ySize; j++ ) file << i * hx << " " << j * hy << " " << data[ j * xSize + i ] << endl; file << i * hx - originX << " " << j * hy - originY << " " << data[ j * xSize + i ] << endl; file << endl; } } Loading Loading @@ -308,8 +310,11 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, cerr << "Allocation failed. " << cudaErr << endl; return false; } //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy ); tnlVector< Real, tnlCuda, Index > vecAux; vecAux.bind( cuda_aux, gridXSize * gridYSize ); vecAux.setValue( 0.0 ); cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "initial", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); /**** * Explicit Euler solver Loading Loading @@ -340,9 +345,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, return false; computationTimer.stop(); //cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy ); //getchar(); /*cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); getchar();*/ updateTimer.start(); /**** Loading @@ -355,9 +360,9 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, cerr << "Update failed." << endl; return false; } //cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "u", aux, gridXSize, gridYSize, hx, hy ); //getchar(); /*cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); getchar();*/ cudaThreadSynchronize(); Loading @@ -382,7 +387,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, } timer.stop(); cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); /**** * Saving the result Loading