Loading CMakeLists.txt +10 −6 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ # Vladimir Klement # Jakub Klinkovsky cmake_minimum_required( VERSION 3.0 ) cmake_minimum_required( VERSION 3.4 ) project( tnl ) Loading @@ -33,7 +33,6 @@ if( CMAKE_BUILD_TYPE STREQUAL "Debug") set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/lib ) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/bin ) set( debugExt -dbg ) set( CMAKE_CXX_FLAGS "${CXXFLAGS} -g ") else() set( PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/Release/src ) set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/tests ) Loading @@ -41,10 +40,12 @@ else() set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/lib) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin) endif() # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) # pass -rdynamic only in Debug mode set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" ) set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" ) Loading Loading @@ -110,6 +111,9 @@ if( WITH_CUDA STREQUAL "yes" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} ) # TODO: this is necessary only due to a bug in cmake set( CUDA_ADD_LIBRARY_OPTIONS -shared ) # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -ccbin "${CMAKE_CXX_COMPILER}" ) set( CUDA_HOST_COMPILER "" ) #### # Check for cuBLAS Loading TODO +5 −5 Original line number Diff line number Diff line Loading @@ -10,6 +10,11 @@ TODO: - data by se na hostu preskupila do souvisleho bloku dat a ten se prenesl najednou TODO: - zrejme bude potreba udrzovat ke kazdemu objektu jeho obraz na GPU/MIC - to by zarizovala metoda syncToDevice() napr. kazdy objekt by mel promennou modified, ktera by rikala, jestli se zmenil a zda je nutne ho prekopirovavat TODO: - zavest namespaces Loading @@ -21,11 +26,6 @@ TODO: CUDA unified memory se s nimi pracovat postaru - bylo by dobre to obalit unique poinetry, aby se nemusela delat dealokace rucne TODO: shared pointery - mohli bysme pomoci nich odstranit Shared objekty - asi by bylo lepsi datcounter z shared pointeru primo do array a tento counter by se alokoval az po porvnim sdileni dat - diky tomu by se array mohlo vytvaret i na gpu bez nutnosti dynamicke alokace, jen by nebylo mozne delat bind (nebo nejaky zjednoduseny) TODO: Mesh * vsechny traits zkusit presunout do jednotneho MeshTraits, tj. temer MeshConfigTraits ale pojmenovat jako MeshTraits * omezit tnlDimesnionsTag - asi to ale nepujde Loading src/core/CMakeLists.txt +3 −3 Original line number Diff line number Diff line Loading @@ -22,13 +22,13 @@ set (headers tnlAssert.h tnlList.h tnlList_impl.h tnlLogger.h tnlOmp.h tnlObject.h tnlStack.h tnlStaticFor.h tnlStatistics.h tnlString.h tnlReal.h tnlTimer.h tnlTimerCPU.h tnlTimerRT.h mfilename.h Loading @@ -48,12 +48,12 @@ set( common_SOURCES ${CURRENT_DIR}/tnlObject.cpp ${CURRENT_DIR}/tnlStatistics.cpp ${CURRENT_DIR}/tnlString.cpp ${CURRENT_DIR}/tnlTimer.cpp ${CURRENT_DIR}/tnlTimerCPU.cpp ${CURRENT_DIR}/mfilename.cpp ${CURRENT_DIR}/mpi-supp.cpp ${CURRENT_DIR}/tnlCuda.cpp ${CURRENT_DIR}/tnlHost.cpp ${CURRENT_DIR}/tnlOmp.cpp ) ${CURRENT_DIR}/tnlHost.cpp ) IF( BUILD_CUDA ) set( tnl_core_CUDA__SOURCES Loading src/core/terminal-colors.h +7 −6 Original line number Diff line number Diff line Loading @@ -18,12 +18,13 @@ #ifndef TERMINAL_COLORS_H #define TERMINAL_COLORS_H const std::string red( "\033[0;31m" ); const std::string green( "\033[1;32m" ); const std::string yellow( "\033[1;33m" ); const std::string cyan( "\033[0;36m" ); const std::string magenta( "\033[0;35m" ); const std::string reset( "\033[0m" ); const tnlString red( "\033[0;31m" ); const tnlString green( "\033[1;32m" ); const tnlString yellow( "\033[1;33m" ); const tnlString cyan( "\033[0;36m" ); const tnlString magenta( "\033[0;35m" ); const tnlString bold(); const tnlString reset( "\033[0m" ); #endif /* TERMINAL_COLORS_H */ Loading src/core/tnlCuda.cu +13 −10 Original line number Diff line number Diff line Loading @@ -16,27 +16,30 @@ ***************************************************************************/ #include <core/tnlCuda.h> #include <config/tnlConfigDescription.h> #include <config/tnlParameterContainer.h> void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) /*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) { #ifdef HAVE_CUDA //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 ); #else //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 ); #endif } bool tnlCuda::setup( const tnlParameterContainer& parameters, const tnlString& prefix ) { //enable = parameters.getParameter< bool >( prefix + "omp-enabled" ); //maxThreadsCount = parameters.getParameter< int ( prefix + "omp-max-threads" ); int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); #ifdef HAVE_CUDA cudaSetDevice( cudaDevice ); checkCudaDevice; #endif return true; } */ bool tnlCuda::checkDevice( const char* file_name, int line ) { Loading Loading
CMakeLists.txt +10 −6 Original line number Diff line number Diff line Loading @@ -12,7 +12,7 @@ # Vladimir Klement # Jakub Klinkovsky cmake_minimum_required( VERSION 3.0 ) cmake_minimum_required( VERSION 3.4 ) project( tnl ) Loading @@ -33,7 +33,6 @@ if( CMAKE_BUILD_TYPE STREQUAL "Debug") set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/lib ) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/bin ) set( debugExt -dbg ) set( CMAKE_CXX_FLAGS "${CXXFLAGS} -g ") else() set( PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/Release/src ) set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/tests ) Loading @@ -41,10 +40,12 @@ else() set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/lib) set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin) endif() # set Debug/Release options set( CMAKE_CXX_FLAGS "-std=c++11" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG" ) #set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" ) # pass -rdynamic only in Debug mode set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" ) set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" ) Loading Loading @@ -110,6 +111,9 @@ if( WITH_CUDA STREQUAL "yes" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} ) # TODO: this is necessary only due to a bug in cmake set( CUDA_ADD_LIBRARY_OPTIONS -shared ) # TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -ccbin "${CMAKE_CXX_COMPILER}" ) set( CUDA_HOST_COMPILER "" ) #### # Check for cuBLAS Loading
TODO +5 −5 Original line number Diff line number Diff line Loading @@ -10,6 +10,11 @@ TODO: - data by se na hostu preskupila do souvisleho bloku dat a ten se prenesl najednou TODO: - zrejme bude potreba udrzovat ke kazdemu objektu jeho obraz na GPU/MIC - to by zarizovala metoda syncToDevice() napr. kazdy objekt by mel promennou modified, ktera by rikala, jestli se zmenil a zda je nutne ho prekopirovavat TODO: - zavest namespaces Loading @@ -21,11 +26,6 @@ TODO: CUDA unified memory se s nimi pracovat postaru - bylo by dobre to obalit unique poinetry, aby se nemusela delat dealokace rucne TODO: shared pointery - mohli bysme pomoci nich odstranit Shared objekty - asi by bylo lepsi datcounter z shared pointeru primo do array a tento counter by se alokoval az po porvnim sdileni dat - diky tomu by se array mohlo vytvaret i na gpu bez nutnosti dynamicke alokace, jen by nebylo mozne delat bind (nebo nejaky zjednoduseny) TODO: Mesh * vsechny traits zkusit presunout do jednotneho MeshTraits, tj. temer MeshConfigTraits ale pojmenovat jako MeshTraits * omezit tnlDimesnionsTag - asi to ale nepujde Loading
src/core/CMakeLists.txt +3 −3 Original line number Diff line number Diff line Loading @@ -22,13 +22,13 @@ set (headers tnlAssert.h tnlList.h tnlList_impl.h tnlLogger.h tnlOmp.h tnlObject.h tnlStack.h tnlStaticFor.h tnlStatistics.h tnlString.h tnlReal.h tnlTimer.h tnlTimerCPU.h tnlTimerRT.h mfilename.h Loading @@ -48,12 +48,12 @@ set( common_SOURCES ${CURRENT_DIR}/tnlObject.cpp ${CURRENT_DIR}/tnlStatistics.cpp ${CURRENT_DIR}/tnlString.cpp ${CURRENT_DIR}/tnlTimer.cpp ${CURRENT_DIR}/tnlTimerCPU.cpp ${CURRENT_DIR}/mfilename.cpp ${CURRENT_DIR}/mpi-supp.cpp ${CURRENT_DIR}/tnlCuda.cpp ${CURRENT_DIR}/tnlHost.cpp ${CURRENT_DIR}/tnlOmp.cpp ) ${CURRENT_DIR}/tnlHost.cpp ) IF( BUILD_CUDA ) set( tnl_core_CUDA__SOURCES Loading
src/core/terminal-colors.h +7 −6 Original line number Diff line number Diff line Loading @@ -18,12 +18,13 @@ #ifndef TERMINAL_COLORS_H #define TERMINAL_COLORS_H const std::string red( "\033[0;31m" ); const std::string green( "\033[1;32m" ); const std::string yellow( "\033[1;33m" ); const std::string cyan( "\033[0;36m" ); const std::string magenta( "\033[0;35m" ); const std::string reset( "\033[0m" ); const tnlString red( "\033[0;31m" ); const tnlString green( "\033[1;32m" ); const tnlString yellow( "\033[1;33m" ); const tnlString cyan( "\033[0;36m" ); const tnlString magenta( "\033[0;35m" ); const tnlString bold(); const tnlString reset( "\033[0m" ); #endif /* TERMINAL_COLORS_H */ Loading
src/core/tnlCuda.cu +13 −10 Original line number Diff line number Diff line Loading @@ -16,27 +16,30 @@ ***************************************************************************/ #include <core/tnlCuda.h> #include <config/tnlConfigDescription.h> #include <config/tnlParameterContainer.h> void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) /*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix ) { #ifdef HAVE_CUDA //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true ); //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() ); config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 ); #else //config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false ); //config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 ); config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 ); #endif } bool tnlCuda::setup( const tnlParameterContainer& parameters, const tnlString& prefix ) { //enable = parameters.getParameter< bool >( prefix + "omp-enabled" ); //maxThreadsCount = parameters.getParameter< int ( prefix + "omp-max-threads" ); int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); #ifdef HAVE_CUDA cudaSetDevice( cudaDevice ); checkCudaDevice; #endif return true; } */ bool tnlCuda::checkDevice( const char* file_name, int line ) { Loading