Loading CMakeLists.txt +6 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,12 @@ else() AddCompilerFlag( "-O3 -march=native -DNDEBUG -g" ) endif() get_filename_component( CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME ) if( CXX_COMPILER_NAME MATCHES "icpc" ) message( "Intel compiler detected..." ) AddCompilerFlag( "-DHAVE_ICPC" ) endif() ##### # Check for CUDA # Loading src/core/tnlStaticFor.h +20 −0 Original line number Diff line number Diff line Loading @@ -129,20 +129,28 @@ class tnlStaticFor __cuda_callable__ static void exec() { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec(); #else tnlAssert( false, ); #endif } template< typename T > __cuda_callable__ static void exec( T &p ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -150,10 +158,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1 ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -162,10 +174,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1, T2& p2 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1, p2 ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -175,10 +191,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1, T2& p2, T3& p3 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1, p2, p3 ); #else tnlAssert( false, ); #endif } }; Loading src/core/vectors/tnlVectorOperationsCuda_impl.h +4 −0 Original line number Diff line number Diff line Loading @@ -580,6 +580,7 @@ void tnlVectorOperations< tnlCuda >::computePrefixSum( Vector& v, typename Vector::IndexType begin, typename Vector::IndexType end ) { #ifdef HAVE_CUDA typedef tnlParallelReductionSum< typename Vector::RealType, typename Vector::IndexType > OperationType; Loading @@ -592,6 +593,9 @@ void tnlVectorOperations< tnlCuda >::computePrefixSum( Vector& v, &v.getData()[ begin ], operation, inclusivePrefixSum ); #else tnlCudaSupportMissingMessage;; #endif } template< typename Vector > Loading tools/src/tnl-mesh-convert.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ * * ***************************************************************************/ #ifndef HAVE_NOT_CXX11 #ifndef HAVE_ICPC #include "tnl-mesh-convert.h" #endif #include "tnlConfig.h" Loading Loading @@ -43,7 +43,7 @@ int main( int argc, char* argv[] ) conf_desc.printUsage( argv[ 0 ] ); return EXIT_FAILURE; } #ifndef HAVE_NOT_CXX11 #ifndef HAVE_ICPC if( ! convertMesh( parameters ) ) return EXIT_FAILURE; #endif Loading Loading
CMakeLists.txt +6 −0 Original line number Diff line number Diff line Loading @@ -45,6 +45,12 @@ else() AddCompilerFlag( "-O3 -march=native -DNDEBUG -g" ) endif() get_filename_component( CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER} NAME ) if( CXX_COMPILER_NAME MATCHES "icpc" ) message( "Intel compiler detected..." ) AddCompilerFlag( "-DHAVE_ICPC" ) endif() ##### # Check for CUDA # Loading
src/core/tnlStaticFor.h +20 −0 Original line number Diff line number Diff line Loading @@ -129,20 +129,28 @@ class tnlStaticFor __cuda_callable__ static void exec() { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec(); #else tnlAssert( false, ); #endif } template< typename T > __cuda_callable__ static void exec( T &p ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -150,10 +158,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1 ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -162,10 +174,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1, T2& p2 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1, p2 ); #else tnlAssert( false, ); #endif } template< typename T0, Loading @@ -175,10 +191,14 @@ class tnlStaticFor __cuda_callable__ static void exec( T0& p0, T1& p1, T2& p2, T3& p3 ) { #ifndef HAVE_ICPC StaticFor< IndexType, tnlStaticForIndexTag< IndexType, begin >, tnlStaticForIndexTag< IndexType, end - begin >, LoopBody >::exec( p0, p1, p2, p3 ); #else tnlAssert( false, ); #endif } }; Loading
src/core/vectors/tnlVectorOperationsCuda_impl.h +4 −0 Original line number Diff line number Diff line Loading @@ -580,6 +580,7 @@ void tnlVectorOperations< tnlCuda >::computePrefixSum( Vector& v, typename Vector::IndexType begin, typename Vector::IndexType end ) { #ifdef HAVE_CUDA typedef tnlParallelReductionSum< typename Vector::RealType, typename Vector::IndexType > OperationType; Loading @@ -592,6 +593,9 @@ void tnlVectorOperations< tnlCuda >::computePrefixSum( Vector& v, &v.getData()[ begin ], operation, inclusivePrefixSum ); #else tnlCudaSupportMissingMessage;; #endif } template< typename Vector > Loading
tools/src/tnl-mesh-convert.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -15,7 +15,7 @@ * * ***************************************************************************/ #ifndef HAVE_NOT_CXX11 #ifndef HAVE_ICPC #include "tnl-mesh-convert.h" #endif #include "tnlConfig.h" Loading Loading @@ -43,7 +43,7 @@ int main( int argc, char* argv[] ) conf_desc.printUsage( argv[ 0 ] ); return EXIT_FAILURE; } #ifndef HAVE_NOT_CXX11 #ifndef HAVE_ICPC if( ! convertMesh( parameters ) ) return EXIT_FAILURE; #endif Loading