Loading CMakeLists.txt +13 −4 Original line number Diff line number Diff line Loading @@ -28,9 +28,15 @@ else() AddCompilerFlag( "-O3 -DNDEBUG" ) endif() if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION" ) endif() if( WITH_CUDA STREQUAL "yes" ) AddCompilerFlag( "-DHAVE_NOT_CXX11" ) else() AddCompilerFlag( "-std=gnu++0x" ) #AddCompilerFlag( "-DHAVE_NOT_CXX11" ) endif() ##### Loading Loading @@ -288,7 +294,10 @@ if(WIN32 AND NOT UNIX) set(CPACK_NSIS_MODIFY_PATH ON) else(WIN32 AND NOT UNIX) # set(CPACK_STRIP_FILES "bin/MyExecutable") set(CPACK_SOURCE_STRIP_FILES "") endif(WIN32 AND NOT UNIX) set(CPACK_SOURCE_STRIP_FILES "Debug") set(CPACK_SOURCE_STRIP_FILES "Release") #set(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") INCLUDE( CPack ) buildAll +15 −4 Original line number Diff line number Diff line Loading @@ -2,14 +2,14 @@ TARGET=TNL INSTALL_PREFIX=${HOME}/local WITH_CUDA=no WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes VERBOSE=1 CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS=1 echo "Building $TARGET using $CPUS processors." Loading @@ -23,13 +23,24 @@ then fi cd Debug ${CMAKE} .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR} ${CMAKE} .. -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} #VERBOSE=1 make -j${CPUS} test make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR} ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} #VERBOSE=1 make -j${CPUS} test make -j${CPUS} install src/core/cuda/reduction-operations.h +17 −11 Original line number Diff line number Diff line Loading @@ -93,6 +93,12 @@ __device__ inline double tnlCudaAbs( const double& a ) { return fabs( a ); } template< typename Type1, typename Type2 > __device__ Type1 tnlCudaPow( const Type1& x, const Type2& power ) { return ( Type1 ) pow( ( double ) x, ( double ) power ); } #endif template< typename Real, typename Index > Loading Loading @@ -683,14 +689,14 @@ class tnlParallelReductionLpNorm const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] ), p ) + pow( tnlCudaAbs( data1[ idx2 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] ), p ) + tnlCudaPow( tnlCudaAbs( data1[ idx2 ] ), p ); } __device__ ResultType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -701,8 +707,8 @@ class tnlParallelReductionLpNorm const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] ), p ) + pow( tnlCudaAbs( data2[ idx3 ] ), p ); tnlCudaPow( tnlCudaAbs( data2[ idx2 ] ), p ) + tnlCudaPow( tnlCudaAbs( data2[ idx3 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -711,7 +717,7 @@ class tnlParallelReductionLpNorm const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] ), p ); return data1[ idx1 ] + tnlCudaPow( tnlCudaAbs( data2[ idx2 ] ), p ); }; __device__ ResultType commonReductionOnDevice( const IndexType idx1, Loading Loading @@ -1409,15 +1415,15 @@ class tnlParallelReductionDiffLpNorm const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ) + pow( tnlCudaAbs( data1[ idx2 ] - data2[ idx2 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ) + tnlCudaPow( tnlCudaAbs( data1[ idx2 ] - data2[ idx2 ] ), p ); } __device__ ResultType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -1428,8 +1434,8 @@ class tnlParallelReductionDiffLpNorm const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ) + pow( tnlCudaAbs( data2[ idx3 ] - data3[ idx3 ] ), p ); tnlCudaPow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ) + tnlCudaPow( tnlCudaAbs( data2[ idx3 ] - data3[ idx3 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -1438,7 +1444,7 @@ class tnlParallelReductionDiffLpNorm const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ); return data1[ idx1 ] + tnlCudaPow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ); }; __device__ ResultType commonReductionOnDevice( const IndexType idx1, Loading src/core/mfuncs.h +4 −4 Original line number Diff line number Diff line Loading @@ -21,19 +21,19 @@ #include <math.h> #include <stdlib.h> template< class T > T Min( const T& a, const T& b ) template< typename Type1, typename Type2 > Type1 Min( const Type1& a, const Type2& b ) { return a < b ? a : b; }; template< class T > T Max( const T& a, const T& b ) template< typename Type1, typename Type2 > Type1 Max( const Type1& a, const Type2& b ) { return a > b ? a : b; }; template< class T > void Swap( T& a, T& b ) template< typename Type > void Swap( Type& a, Type& b ) { T tmp( a ); Type tmp( a ); a = b; b = tmp; }; Loading src/implementation/solvers/ode/tnlMersonSolver_impl.h +8 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,14 @@ #include <core/tnlHost.h> #include <core/tnlCuda.h> /**** * In this code we do not use constants and references as we would like to. * OpenMP would complain that * * error: ‘some-variable’ is predetermined ‘shared’ for ‘firstprivate’ * */ #ifdef HAVE_CUDA template< typename Real, typename Index > Loading Loading
CMakeLists.txt +13 −4 Original line number Diff line number Diff line Loading @@ -28,9 +28,15 @@ else() AddCompilerFlag( "-O3 -DNDEBUG" ) endif() if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) AddCompilerFlag( "-DTEMPLATE_EXPLICIT_INSTANTIATION" ) endif() if( WITH_CUDA STREQUAL "yes" ) AddCompilerFlag( "-DHAVE_NOT_CXX11" ) else() AddCompilerFlag( "-std=gnu++0x" ) #AddCompilerFlag( "-DHAVE_NOT_CXX11" ) endif() ##### Loading Loading @@ -288,7 +294,10 @@ if(WIN32 AND NOT UNIX) set(CPACK_NSIS_MODIFY_PATH ON) else(WIN32 AND NOT UNIX) # set(CPACK_STRIP_FILES "bin/MyExecutable") set(CPACK_SOURCE_STRIP_FILES "") endif(WIN32 AND NOT UNIX) set(CPACK_SOURCE_STRIP_FILES "Debug") set(CPACK_SOURCE_STRIP_FILES "Release") #set(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") INCLUDE( CPack )
buildAll +15 −4 Original line number Diff line number Diff line Loading @@ -2,14 +2,14 @@ TARGET=TNL INSTALL_PREFIX=${HOME}/local WITH_CUDA=no WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes VERBOSE=1 CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS=1 echo "Building $TARGET using $CPUS processors." Loading @@ -23,13 +23,24 @@ then fi cd Debug ${CMAKE} .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR} ${CMAKE} .. -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} #VERBOSE=1 make -j${CPUS} test make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR} ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} #VERBOSE=1 make -j${CPUS} test make -j${CPUS} install
src/core/cuda/reduction-operations.h +17 −11 Original line number Diff line number Diff line Loading @@ -93,6 +93,12 @@ __device__ inline double tnlCudaAbs( const double& a ) { return fabs( a ); } template< typename Type1, typename Type2 > __device__ Type1 tnlCudaPow( const Type1& x, const Type2& power ) { return ( Type1 ) pow( ( double ) x, ( double ) power ); } #endif template< typename Real, typename Index > Loading Loading @@ -683,14 +689,14 @@ class tnlParallelReductionLpNorm const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] ), p ) + pow( tnlCudaAbs( data1[ idx2 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] ), p ) + tnlCudaPow( tnlCudaAbs( data1[ idx2 ] ), p ); } __device__ ResultType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -701,8 +707,8 @@ class tnlParallelReductionLpNorm const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] ), p ) + pow( tnlCudaAbs( data2[ idx3 ] ), p ); tnlCudaPow( tnlCudaAbs( data2[ idx2 ] ), p ) + tnlCudaPow( tnlCudaAbs( data2[ idx3 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -711,7 +717,7 @@ class tnlParallelReductionLpNorm const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] ), p ); return data1[ idx1 ] + tnlCudaPow( tnlCudaAbs( data2[ idx2 ] ), p ); }; __device__ ResultType commonReductionOnDevice( const IndexType idx1, Loading Loading @@ -1409,15 +1415,15 @@ class tnlParallelReductionDiffLpNorm const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ) + pow( tnlCudaAbs( data1[ idx2 ] - data2[ idx2 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ) + tnlCudaPow( tnlCudaAbs( data1[ idx2 ] - data2[ idx2 ] ), p ); } __device__ ResultType initialValueOnDevice( const IndexType idx1, const RealType* data1, const RealType* data2 ) const { return pow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ); return tnlCudaPow( tnlCudaAbs( data1[ idx1 ] - data2[ idx1 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -1428,8 +1434,8 @@ class tnlParallelReductionDiffLpNorm const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ) + pow( tnlCudaAbs( data2[ idx3 ] - data3[ idx3 ] ), p ); tnlCudaPow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ) + tnlCudaPow( tnlCudaAbs( data2[ idx3 ] - data3[ idx3 ] ), p ); }; __device__ ResultType firstReductionOnDevice( const IndexType idx1, Loading @@ -1438,7 +1444,7 @@ class tnlParallelReductionDiffLpNorm const RealType* data2, const RealType* data3 ) const { return data1[ idx1 ] + pow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ); return data1[ idx1 ] + tnlCudaPow( tnlCudaAbs( data2[ idx2 ] - data3[ idx2 ] ), p ); }; __device__ ResultType commonReductionOnDevice( const IndexType idx1, Loading
src/core/mfuncs.h +4 −4 Original line number Diff line number Diff line Loading @@ -21,19 +21,19 @@ #include <math.h> #include <stdlib.h> template< class T > T Min( const T& a, const T& b ) template< typename Type1, typename Type2 > Type1 Min( const Type1& a, const Type2& b ) { return a < b ? a : b; }; template< class T > T Max( const T& a, const T& b ) template< typename Type1, typename Type2 > Type1 Max( const Type1& a, const Type2& b ) { return a > b ? a : b; }; template< class T > void Swap( T& a, T& b ) template< typename Type > void Swap( Type& a, Type& b ) { T tmp( a ); Type tmp( a ); a = b; b = tmp; }; Loading
src/implementation/solvers/ode/tnlMersonSolver_impl.h +8 −0 Original line number Diff line number Diff line Loading @@ -21,6 +21,14 @@ #include <core/tnlHost.h> #include <core/tnlCuda.h> /**** * In this code we do not use constants and references as we would like to. * OpenMP would complain that * * error: ‘some-variable’ is predetermined ‘shared’ for ‘firstprivate’ * */ #ifdef HAVE_CUDA template< typename Real, typename Index > Loading