Loading examples/heat-equation/HeatEquationBuildConfigTag.h 0 → 100644 +60 −0 Original line number Diff line number Diff line /*************************************************************************** HeatEquationBuildConfigTag.h - description ------------------- begin : Jul 7, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Solvers/BuildConfigTags.h> namespace TNL { namespace Solvers { class HeatEquationBuildConfig { public: static void print() { std::cerr << "HeatEquationBuildConfig" << std::endl; } }; /**** * Turn off support for float and long double. */ template<> struct ConfigTagReal< HeatEquationBuildConfig, float > { enum { enabled = false }; }; template<> struct ConfigTagReal< HeatEquationBuildConfig, long double > { enum { enabled = false }; }; /**** * Turn off support for short int and long int indexing. */ template<> struct ConfigTagIndex< HeatEquationBuildConfig, short int >{ enum { enabled = false }; }; template<> struct ConfigTagIndex< HeatEquationBuildConfig, long int >{ enum { enabled = false }; }; /**** * Use of Grid is enabled for allowed dimensions and Real, Device and Index types. */ template< int Dimensions, typename Real, typename Device, typename Index > struct ConfigTagMesh< HeatEquationBuildConfig, Meshes::Grid< Dimensions, Real, Device, Index > > { enum { enabled = ConfigTagDimensions< HeatEquationBuildConfig, Dimensions >::enabled && ConfigTagReal< HeatEquationBuildConfig, Real >::enabled && ConfigTagDevice< HeatEquationBuildConfig, Device >::enabled && ConfigTagIndex< HeatEquationBuildConfig, Index >::enabled }; }; /**** * Please, chose your preferred time discretization here. */ template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, SemiImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; /**** * Only the Runge-Kutta-Merson solver is enabled by default. */ template<> struct ConfigTagExplicitSolver< HeatEquationBuildConfig, ExplicitEulerSolverTag >{ enum { enabled = false }; }; } // namespace Solvers } // namespace TNL examples/heat-equation/tnl-heat-equation.h +3 −1 Original line number Diff line number Diff line Loading @@ -21,12 +21,14 @@ #include <TNL/Functions/MeshFunction.h> #include <TNL/Problems/HeatEquationProblem.h> #include <TNL/Meshes/Grid.h> #include "HeatEquationBuildConfigTag.h" using namespace TNL; using namespace TNL::Problems; //typedef tnlDefaultBuildMeshConfig BuildConfig; typedef Solvers::FastBuildConfig BuildConfig; //typedef Solvers::FastBuildConfig BuildConfig; typedef Solvers::HeatEquationBuildConfig BuildConfig; template< typename MeshConfig > class heatEquationConfig Loading src/TNL/Devices/Cuda.cu +7 −17 Original line number Diff line number Diff line Loading @@ -23,18 +23,9 @@ void Cuda::setupThreads( const dim3& blockSize, long long int yThreads, long long int zThreads ) { if( blockSize.x ) blocksCount.x = xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ); else blocksCount.x = 0; if( blockSize.y ) blocksCount.y = yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ); else blocksCount.y = 0; if( blockSize.z ) blocksCount.z = xThreads / blockSize.z + ( zThreads % blockSize.z != 0 ); else blocksCount.z = 0; blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) ); blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) ); blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) ); /**** * TODO: Fix the following: Loading Loading @@ -80,21 +71,20 @@ void Cuda::setupGrid( const dim3& blocksCount, else gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/ if( gridIdx.x < gridsCount.x ) if( gridIdx.x < gridsCount.x - 1 ) gridSize.x = getMaxGridSize(); else gridSize.x = blocksCount.x % getMaxGridSize(); if( gridIdx.y < gridsCount.y ) if( gridIdx.y < gridsCount.y - 1 ) gridSize.y = getMaxGridSize(); else gridSize.y = blocksCount.y % getMaxGridSize(); if( gridIdx.z < gridsCount.z ) if( gridIdx.z < gridsCount.z - 1 ) gridSize.z = getMaxGridSize(); else gridSize.z = blocksCount.z % getMaxGridSize(); } void Cuda::printThreadsSetup( const dim3& blockSize, Loading src/TNL/Meshes/GridDetails/GridTraverser_impl.h +4 −1 Original line number Diff line number Diff line Loading @@ -451,6 +451,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ ) { Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s1 >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading @@ -475,6 +476,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ ) { Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongY, cudaGridSize, cudaGridsCountAlongY ); GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s3 >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading @@ -494,6 +496,7 @@ processEntities( gridIdx, gridEntityParameters... ); } //getchar(); cudaStreamSynchronize( s1 ); cudaStreamSynchronize( s2 ); cudaStreamSynchronize( s3 ); Loading Loading @@ -522,7 +525,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ ) { Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading Loading
examples/heat-equation/HeatEquationBuildConfigTag.h 0 → 100644 +60 −0 Original line number Diff line number Diff line /*************************************************************************** HeatEquationBuildConfigTag.h - description ------------------- begin : Jul 7, 2014 copyright : (C) 2014 by Tomas Oberhuber email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #pragma once #include <TNL/Solvers/BuildConfigTags.h> namespace TNL { namespace Solvers { class HeatEquationBuildConfig { public: static void print() { std::cerr << "HeatEquationBuildConfig" << std::endl; } }; /**** * Turn off support for float and long double. */ template<> struct ConfigTagReal< HeatEquationBuildConfig, float > { enum { enabled = false }; }; template<> struct ConfigTagReal< HeatEquationBuildConfig, long double > { enum { enabled = false }; }; /**** * Turn off support for short int and long int indexing. */ template<> struct ConfigTagIndex< HeatEquationBuildConfig, short int >{ enum { enabled = false }; }; template<> struct ConfigTagIndex< HeatEquationBuildConfig, long int >{ enum { enabled = false }; }; /**** * Use of Grid is enabled for allowed dimensions and Real, Device and Index types. */ template< int Dimensions, typename Real, typename Device, typename Index > struct ConfigTagMesh< HeatEquationBuildConfig, Meshes::Grid< Dimensions, Real, Device, Index > > { enum { enabled = ConfigTagDimensions< HeatEquationBuildConfig, Dimensions >::enabled && ConfigTagReal< HeatEquationBuildConfig, Real >::enabled && ConfigTagDevice< HeatEquationBuildConfig, Device >::enabled && ConfigTagIndex< HeatEquationBuildConfig, Index >::enabled }; }; /**** * Please, chose your preferred time discretization here. */ template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, SemiImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; /**** * Only the Runge-Kutta-Merson solver is enabled by default. */ template<> struct ConfigTagExplicitSolver< HeatEquationBuildConfig, ExplicitEulerSolverTag >{ enum { enabled = false }; }; } // namespace Solvers } // namespace TNL
examples/heat-equation/tnl-heat-equation.h +3 −1 Original line number Diff line number Diff line Loading @@ -21,12 +21,14 @@ #include <TNL/Functions/MeshFunction.h> #include <TNL/Problems/HeatEquationProblem.h> #include <TNL/Meshes/Grid.h> #include "HeatEquationBuildConfigTag.h" using namespace TNL; using namespace TNL::Problems; //typedef tnlDefaultBuildMeshConfig BuildConfig; typedef Solvers::FastBuildConfig BuildConfig; //typedef Solvers::FastBuildConfig BuildConfig; typedef Solvers::HeatEquationBuildConfig BuildConfig; template< typename MeshConfig > class heatEquationConfig Loading
src/TNL/Devices/Cuda.cu +7 −17 Original line number Diff line number Diff line Loading @@ -23,18 +23,9 @@ void Cuda::setupThreads( const dim3& blockSize, long long int yThreads, long long int zThreads ) { if( blockSize.x ) blocksCount.x = xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ); else blocksCount.x = 0; if( blockSize.y ) blocksCount.y = yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ); else blocksCount.y = 0; if( blockSize.z ) blocksCount.z = xThreads / blockSize.z + ( zThreads % blockSize.z != 0 ); else blocksCount.z = 0; blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) ); blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) ); blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) ); /**** * TODO: Fix the following: Loading Loading @@ -80,21 +71,20 @@ void Cuda::setupGrid( const dim3& blocksCount, else gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/ if( gridIdx.x < gridsCount.x ) if( gridIdx.x < gridsCount.x - 1 ) gridSize.x = getMaxGridSize(); else gridSize.x = blocksCount.x % getMaxGridSize(); if( gridIdx.y < gridsCount.y ) if( gridIdx.y < gridsCount.y - 1 ) gridSize.y = getMaxGridSize(); else gridSize.y = blocksCount.y % getMaxGridSize(); if( gridIdx.z < gridsCount.z ) if( gridIdx.z < gridsCount.z - 1 ) gridSize.z = getMaxGridSize(); else gridSize.z = blocksCount.z % getMaxGridSize(); } void Cuda::printThreadsSetup( const dim3& blockSize, Loading
src/TNL/Meshes/GridDetails/GridTraverser_impl.h +4 −1 Original line number Diff line number Diff line Loading @@ -451,6 +451,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ ) { Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s1 >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading @@ -475,6 +476,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ ) { Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongY, cudaGridSize, cudaGridsCountAlongY ); GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s3 >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading @@ -494,6 +496,7 @@ processEntities( gridIdx, gridEntityParameters... ); } //getchar(); cudaStreamSynchronize( s1 ); cudaStreamSynchronize( s2 ); cudaStreamSynchronize( s3 ); Loading Loading @@ -522,7 +525,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ ) { Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), Loading