diff --git a/src/TNL/Devices/Cuda.cpp b/src/TNL/Devices/Cuda.cpp index ef868455784575e3304ac4ad2416e6b856f38ce5..11301ef8687534f30202526bdfd59a06d142f530 100644 --- a/src/TNL/Devices/Cuda.cpp +++ b/src/TNL/Devices/Cuda.cpp @@ -19,6 +19,7 @@ namespace TNL { namespace Devices { SmartPointersRegister Cuda::smartPointersRegister; +Timer Cuda::smartPointersSynchronizationTimer; String Cuda::getDeviceType() { @@ -67,6 +68,8 @@ bool Cuda::setup( const Config::ParameterContainer& parameters, std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl; return false; } + smartPointersSynchronizationTimer.reset(); + smartPointersSynchronizationTimer.stop(); #endif return true; } @@ -85,7 +88,10 @@ bool Cuda::synchronizeDevice( int deviceId ) { if( deviceId < 0 ) deviceId = Devices::CudaDeviceInfo::getActiveDevice(); - return smartPointersRegister.synchronizeDevice( deviceId ); + smartPointersSynchronizationTimer.start(); + bool b = smartPointersRegister.synchronizeDevice( deviceId ); + smartPointersSynchronizationTimer.stop(); + return b; } } // namespace Devices diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index 301b0091634e412522a52539563b2a8c0e6b122c..dc3923f4af70ebbf4161b8542d08b2308b5fcf4f 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -15,6 +15,7 @@ #include <TNL/String.h> #include <TNL/Assert.h> #include <TNL/SmartPointersRegister.h> +#include <TNL/Timer.h> namespace TNL { @@ -31,7 +32,6 @@ namespace Devices { #define __cuda_callable__ #endif - class Cuda { public: @@ -108,10 +108,13 @@ class Cuda // called to get the device ID. static bool synchronizeDevice( int deviceId = -1 ); + static Timer smartPointersSynchronizationTimer; + protected: static SmartPointersRegister smartPointersRegister; - + + }; #ifdef HAVE_CUDA diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h index c20e8fa39ffe917ab69511a63894b50f13aac744..84df79b9fb672432c71fad9b67817118c52546c7 100644 --- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h +++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h @@ -156,6 +156,8 @@ operator()( const PreimageFunction& u, const typename EntityType::template NeighbourEntities< 2 >& neighbourEntities = entity.getNeighbourEntities(); const RealType& hxSquareInverse = entity.getMesh().template getSpaceStepsProducts< -2, 0 >(); const RealType& hySquareInverse = entity.getMesh().template getSpaceStepsProducts< 0, -2 >(); + const IndexType c = entity.getIndex(); + const IndexType xSize = entity.getMesh().getDimensions().x(); return ( u[ neighbourEntities.template getEntityIndex< -1, 0 >() ] + u[ neighbourEntities.template getEntityIndex< 1, 0 >() ] ) * hxSquareInverse + ( u[ neighbourEntities.template getEntityIndex< 0, -1 >() ] diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h index dcb8e69a3c481bf389b7332f5276eadded6b4e88..6dc77992669e167ca200a820a87bf32be79d4178 100644 --- a/src/TNL/Problems/HeatEquationProblem_impl.h +++ b/src/TNL/Problems/HeatEquationProblem_impl.h @@ -69,8 +69,6 @@ bool HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: writeEpilog( Logger& logger ) { - logger.writeParameter< const char* >( "GPU transfer time:", "" ); - this->gpuTransferTimer.writeLog( logger, 1 ); return true; } @@ -239,9 +237,9 @@ getExplicitRHS( const RealType& time, time + tau, this->uPointer );*/ - //uPointer->write( "u.txt", "gnuplot" ); - //fuPointer->write( "fu.txt", "gnuplot" ); - //getchar(); + /*uPointer->write( "u.txt", "gnuplot" ); + fuPointer->write( "fu.txt", "gnuplot" ); + getchar();*/ } template< typename Mesh, diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h index afef48a6bfdbd3f6cd5e439765d22d2a6738d91c..2d988c8b4120920ca346dd7e8e205b43f64660f9 100644 --- a/src/TNL/Solvers/SolverStarter_impl.h +++ b/src/TNL/Solvers/SolverStarter_impl.h @@ -478,6 +478,11 @@ bool SolverStarter< ConfigTag > :: writeEpilog( std::ostream& str, const Solver& return false; logger.writeParameter< const char* >( "Compute time:", "" ); this->computeTimer.writeLog( logger, 1 ); + if( std::is_same< typename Solver::DeviceType, TNL::Devices::Cuda >::value ) + { + logger.writeParameter< const char* >( "GPU synchronization time:", "" ); + TNL::Devices::Cuda::smartPointersSynchronizationTimer.writeLog( logger, 1 ); + } logger.writeParameter< const char* >( "I/O time:", "" ); this->ioTimer.writeLog( logger, 1 ); logger.writeParameter< const char* >( "Total time:", "" ); diff --git a/src/TNL/Timer.cpp b/src/TNL/Timer.cpp index 06a50358017c631b9710d5e88b1fd093e43fc4a3..dc561202e7f6f7b759cc599edba7e37d2a1b7362 100644 --- a/src/TNL/Timer.cpp +++ b/src/TNL/Timer.cpp @@ -9,6 +9,7 @@ /* See Copyright Notice in tnl/Copyright */ #include <TNL/Timer.h> +#include <TNL/Logger.h> #include <TNL/tnlConfig.h> #ifdef HAVE_SYS_RESOURCE_H diff --git a/src/TNL/Timer.h b/src/TNL/Timer.h index 58e35e03d1636178ac6907ade662b8217447639a..5019ab46f3bf2ff0dcef2bed65d96ac9af848457 100644 --- a/src/TNL/Timer.h +++ b/src/TNL/Timer.h @@ -11,10 +11,10 @@ #pragma once -#include <TNL/Logger.h> - namespace TNL { +class Logger; + class Timer { public: diff --git a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h index e9e3b5af463addbc9b1411ce779f8616a64c0905..d074b13bc504c16e62a03df39ed7eedd8b997f83 100644 --- a/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h +++ b/tests/benchmarks/heat-equation-benchmark/HeatEquationBenchmarkProblem_impl.h @@ -354,15 +354,15 @@ heatEquationTemplatedCompact( const GridType* grid, { GridEntity entity( *grid, coordinates, entityOrientation, entityBasis ); - //entity.refresh(); - /*if( ! entity.isBoundaryEntity() ) + entity.refresh(); + if( ! entity.isBoundaryEntity() ) { fu( entity ) = ( *differentialOperator )( u, entity, time ); typedef Functions::FunctionAdapter< GridType, RightHandSide > FunctionAdapter; fu( entity ) += FunctionAdapter::getValue( *rightHandSide, entity, time ); - }*/ + } } } #endif diff --git a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h b/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h index 7f2dcfe6bd007bdafbcb634747180b91df3a73d8..3097d652f0a9928d791854c1eacb2728790dba6e 100644 --- a/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h +++ b/tests/benchmarks/heat-equation-benchmark/pure-c-rhs.h @@ -52,12 +52,12 @@ __global__ void boundaryConditionsKernel( Real* u, aux[ j * gridXSize + gridYSize - 1 ] = 0.0; u[ j * gridXSize + gridYSize - 1 ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; } - if( j == 0 && i > 0 && i < gridXSize - 1 ) + if( j == 0 && i < gridXSize ) { aux[ i ] = 0.0; //u[ j * gridXSize + 1 ]; u[ i ] = 0.0; //u[ j * gridXSize + 1 ]; } - if( j == gridYSize -1 && i > 0 && i < gridXSize - 1 ) + if( j == gridYSize -1 && i < gridXSize ) { aux[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; u[ j * gridXSize + i ] = 0.0; //u[ j * gridXSize + gridXSize - 1 ]; @@ -80,11 +80,11 @@ __global__ void heatEquationKernel( const Real* u, j > 0 && j < gridYSize - 1 ) { const Index c = j * gridXSize + i; - aux[ c ] = tau * ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + - ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] * hy_inv ) ); - //aux[ c ] = tau * ( ( __ldg( &u[ c - 1 ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + 1 ] ) ) * hx_inv + - // ( __ldg( &u[ c - gridXSize ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + gridXSize ] ) ) * hy_inv ); - } + aux[ c ] = ( ( u[ c - 1 ] - 2.0 * u[ c ] + u[ c + 1 ] ) * hx_inv + + ( u[ c - gridXSize ] - 2.0 * u[ c ] + u[ c + gridXSize ] ) * hy_inv ); + //aux[ c ] = ( ( __ldg( &u[ c - 1 ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + 1 ] ) ) * hx_inv + + // ( __ldg( &u[ c - gridXSize ] ) - 2.0 * __ldg( &u[ c ] ) + __ldg( &u[ c + gridXSize ] ) ) * hy_inv ); + } } template< typename RealType > diff --git a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h b/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h index 6834ed731da5afeec97f485b4723d176fe741527..f040068f3a9da2c817002ed2572eaffa0560e34b 100644 --- a/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h +++ b/tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation.h @@ -190,13 +190,14 @@ template< typename Real, typename Index > __global__ void updateKernel( Real* u, Real* aux, Real* cudaBlockResidue, - const Index dofs ) + const Index dofs, + Real tau ) { const Index blockOffset = blockIdx.x * blockDim.x; Index idx = blockOffset + threadIdx.x; if( idx < dofs ) - u[ idx ] += aux[ idx ]; + u[ idx ] += tau * aux[ idx ]; __syncthreads(); @@ -346,29 +347,31 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters, const Real timeLeft = finalTime - time; const Real currentTau = tau < timeLeft ? tau : timeLeft; - if( ! pureCRhsCuda( cudaGridSize, cudaBlockSize, cuda_u, cuda_aux, tau, hx_inv, hy_inv, gridXSize, gridYSize) ) + if( ! pureCRhsCuda( cudaGridSize, cudaBlockSize, cuda_u, cuda_aux, currentTau, hx_inv, hy_inv, gridXSize, gridYSize) ) return false; computationTimer.stop(); - /*cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); - writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); - getchar();*/ - + /*if( iteration % 100 == 0 ) + { + cudaMemcpy( aux, cuda_aux, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); + writeFunction( "rhs", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); + + cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); + writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); + getchar(); + }*/ + updateTimer.start(); /**** * Update */ //cout << "Update ... " << std::endl; - updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount ); + updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount, tau ); if( cudaGetLastError() != cudaSuccess ) { std::cerr << "Update failed." << std::endl; return false; - } - /*cudaMemcpy( aux, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); - writeFunction( "u", aux, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); - getchar();*/ - + } cudaThreadSynchronize(); cudaMemcpy( max_du, cuda_max_du, cudaUpdateBlocks.x * sizeof( Real ), cudaMemcpyDeviceToHost ); @@ -391,12 +394,18 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters, cout << "Iteration: " << iteration << "\t Time:" << time << " \r" << flush; } timer.stop(); + if( verbose ) + cout << endl; + //cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); //writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); /**** * Saving the result */ + if( verbose ) + std::cout << "Saving result..." << std::endl; + meshFunction.save( "simple-heat-equation-result.tnl" ); /*** @@ -535,6 +544,9 @@ bool solveHeatEquationHost( const Config::ParameterContainer& parameters, std::cout << "Iteration: " << iteration << "\t \t Time:" << time << " \r" << std::flush; } timer.stop(); + if( verbose ) + cout << endl; + /**** * Saving the result