Loading CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -108,7 +108,7 @@ if( WITH_CUDA STREQUAL "yes" ) set( CUDA_ARCH -gencode arch=compute_${WITH_CUDA_ARCH},code=sm_${WITH_CUDA_ARCH} ) endif() endif() set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} -D_FORCE_INLINES ) # TODO: this is necessary only due to a bug in cmake set( CUDA_ADD_LIBRARY_OPTIONS -shared ) # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1) Loading tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.h +26 −12 Original line number Diff line number Diff line Loading @@ -199,13 +199,15 @@ template< typename Real, typename Index > __global__ void updateKernel( Real* u, Real* aux, Real* cudaBlockResidue, const Index dofs ) const Index dofs, tnlGrid< 2, Real, tnlCuda, Index >* grid ) { typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; GridType grid; typename GridType::TestCell entity( grid ); // coordinates, entityOrientation, entityBasis ); typedef typename GridType::TestCell EntityType; typename GridType::CoordinatesType coordinates; typename EntityType::EntityOrientationType entityOrientation; typename EntityType::EntityBasisType entityBasis; typename GridType::TestCell entity( *grid, coordinates, entityOrientation, entityBasis ); const Index blockOffset = blockIdx.x * blockDim.x; Loading Loading @@ -344,6 +346,17 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, timer.start(); Real time( 0.0 ); Index iteration( 0 ); typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::VertexType VertexType; GridType grid; grid.setDimensions( gridXSize, gridYSize ); grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); GridType* cuda_grid; cudaMalloc( ( void** ) &cuda_grid, sizeof( GridType ) ); cudaMemcpy( cuda_grid, &grid, sizeof( GridType ), cudaMemcpyHostToDevice ); while( time < finalTime ) { computationTimer.start(); Loading @@ -363,7 +376,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, * Update */ //cout << "Update ... " << endl; updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount ); updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount, cuda_grid ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Update failed." << endl; Loading Loading @@ -397,15 +410,16 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, timer.stop(); cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); cudaFree( cuda_grid ); /**** * Saving the result */ typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::VertexType VertexType; GridType grid; grid.setDimensions( gridXSize, gridYSize ); grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); //typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; //typedef typename GridType::VertexType VertexType; //GridType grid; //grid.setDimensions( gridXSize, gridYSize ); //grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); tnlVector< Real, tnlCuda, Index > vecU; vecU.bind( cuda_u, gridXSize * gridYSize ); tnlMeshFunction< GridType > meshFunction; Loading Loading
CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -108,7 +108,7 @@ if( WITH_CUDA STREQUAL "yes" ) set( CUDA_ARCH -gencode arch=compute_${WITH_CUDA_ARCH},code=sm_${WITH_CUDA_ARCH} ) endif() endif() set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} -D_FORCE_INLINES ) # TODO: this is necessary only due to a bug in cmake set( CUDA_ADD_LIBRARY_OPTIONS -shared ) # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1) Loading
tests/benchmarks/heat-equation-benchmark/tnl-benchmark-simple-heat-equation-bug.h +26 −12 Original line number Diff line number Diff line Loading @@ -199,13 +199,15 @@ template< typename Real, typename Index > __global__ void updateKernel( Real* u, Real* aux, Real* cudaBlockResidue, const Index dofs ) const Index dofs, tnlGrid< 2, Real, tnlCuda, Index >* grid ) { typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; GridType grid; typename GridType::TestCell entity( grid ); // coordinates, entityOrientation, entityBasis ); typedef typename GridType::TestCell EntityType; typename GridType::CoordinatesType coordinates; typename EntityType::EntityOrientationType entityOrientation; typename EntityType::EntityBasisType entityBasis; typename GridType::TestCell entity( *grid, coordinates, entityOrientation, entityBasis ); const Index blockOffset = blockIdx.x * blockDim.x; Loading Loading @@ -344,6 +346,17 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, timer.start(); Real time( 0.0 ); Index iteration( 0 ); typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::VertexType VertexType; GridType grid; grid.setDimensions( gridXSize, gridYSize ); grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); GridType* cuda_grid; cudaMalloc( ( void** ) &cuda_grid, sizeof( GridType ) ); cudaMemcpy( cuda_grid, &grid, sizeof( GridType ), cudaMemcpyHostToDevice ); while( time < finalTime ) { computationTimer.start(); Loading @@ -363,7 +376,7 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, * Update */ //cout << "Update ... " << endl; updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount ); updateKernel<<< cudaUpdateBlocks, cudaUpdateBlockSize >>>( cuda_u, cuda_aux, cuda_max_du, dofsCount, cuda_grid ); if( cudaGetLastError() != cudaSuccess ) { cerr << "Update failed." << endl; Loading Loading @@ -397,15 +410,16 @@ bool solveHeatEquationCuda( const tnlParameterContainer& parameters, timer.stop(); cudaMemcpy( u, cuda_u, dofsCount * sizeof( Real ), cudaMemcpyDeviceToHost ); writeFunction( "final", u, gridXSize, gridYSize, hx, hy, domainXSize / 2.0, domainYSize / 2.0 ); cudaFree( cuda_grid ); /**** * Saving the result */ typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; typedef typename GridType::VertexType VertexType; GridType grid; grid.setDimensions( gridXSize, gridYSize ); grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); //typedef tnlGrid< 2, Real, tnlCuda, Index > GridType; //typedef typename GridType::VertexType VertexType; //GridType grid; //grid.setDimensions( gridXSize, gridYSize ); //grid.setDomain( VertexType( 0.0, 0.0 ), VertexType( domainXSize, domainYSize ) ); tnlVector< Real, tnlCuda, Index > vecU; vecU.bind( cuda_u, gridXSize * gridYSize ); tnlMeshFunction< GridType > meshFunction; Loading