Merge branch 'develop' into euler (028750cf) · Commits · TNL / tnl-dev

CMakeLists.txt

+10 −6

Original line number	Diff line number	Diff line
		@@ -12,7 +12,7 @@
		# Vladimir Klement
		# Jakub Klinkovsky

		cmake_minimum_required( VERSION 3.0 )
		cmake_minimum_required( VERSION 3.4 )

		project( tnl )

		@@ -33,7 +33,6 @@ if( CMAKE_BUILD_TYPE STREQUAL "Debug")
		set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/lib )
		set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Debug/bin )
		set( debugExt -dbg )
		set( CMAKE_CXX_FLAGS "${CXXFLAGS} -g ")
		else()
		set( PROJECT_BUILD_PATH ${PROJECT_SOURCE_DIR}/Release/src )
		set( PROJECT_TESTS_PATH ${PROJECT_SOURCE_DIR}/Release/tests )
		@@ -41,10 +40,12 @@ else()
		set( LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/lib)
		set( EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/Release/bin)
		endif()

		# set Debug/Release options
		set( CMAKE_CXX_FLAGS "-std=c++11" )
		set( CMAKE_CXX_FLAGS_DEBUG "-g" )
		set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -g" )
		set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG" )
		#set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -DNDEBUG -ftree-vectorizer-verbose=1 -ftree-vectorize -fopt-info-vec-missed -funroll-loops" )
		# pass -rdynamic only in Debug mode
		set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "" )
		set( CMAKE_SHARED_LIBRARY_LINK_C_FLAGS_DEBUG "-rdynamic" )
		@@ -110,6 +111,9 @@ if( WITH_CUDA STREQUAL "yes" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; ${CUDA_ARCH} )
		# TODO: this is necessary only due to a bug in cmake
		set( CUDA_ADD_LIBRARY_OPTIONS -shared )
		# TODO: workaround for a bug in cmake 3.5.0 (fixed in 3.5.1)
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -ccbin "${CMAKE_CXX_COMPILER}" )
		set( CUDA_HOST_COMPILER "" )

		####
		# Check for cuBLAS

TODO

+5 −5

Original line number	Diff line number	Diff line
		@@ -10,6 +10,11 @@ TODO:
		- data by se na hostu preskupila do souvisleho bloku dat a ten se prenesl najednou


		TODO:
		- zrejme bude potreba udrzovat ke kazdemu objektu jeho obraz na GPU/MIC
		- to by zarizovala metoda syncToDevice() napr. kazdy objekt by mel promennou modified, ktera by rikala, jestli se zmenil a zda je nutne ho
		prekopirovavat

		TODO:
		- zavest namespaces

		@@ -21,11 +26,6 @@ TODO: CUDA unified memory
		se s nimi pracovat postaru
		- bylo by dobre to obalit unique poinetry, aby se nemusela delat dealokace rucne

		TODO: shared pointery
		- mohli bysme pomoci nich odstranit Shared objekty
		- asi by bylo lepsi datcounter z shared pointeru primo do array a tento counter by se alokoval az po porvnim sdileni dat
		- diky tomu by se array mohlo vytvaret i na gpu bez nutnosti dynamicke alokace, jen by nebylo mozne delat bind (nebo nejaky zjednoduseny)

		TODO: Mesh
		* vsechny traits zkusit presunout do jednotneho MeshTraits, tj. temer MeshConfigTraits ale pojmenovat jako MeshTraits
		* omezit tnlDimesnionsTag - asi to ale nepujde

src/core/CMakeLists.txt

+3 −3

Original line number	Diff line number	Diff line
		@@ -22,13 +22,13 @@ set (headers tnlAssert.h
		tnlList.h
		tnlList_impl.h
		tnlLogger.h
		tnlOmp.h
		tnlObject.h
		tnlStack.h
		tnlStaticFor.h
		tnlStatistics.h
		tnlString.h
		tnlReal.h
		tnlTimer.h
		tnlTimerCPU.h
		tnlTimerRT.h
		mfilename.h
		@@ -48,12 +48,12 @@ set( common_SOURCES
		${CURRENT_DIR}/tnlObject.cpp
		${CURRENT_DIR}/tnlStatistics.cpp
		${CURRENT_DIR}/tnlString.cpp
		${CURRENT_DIR}/tnlTimer.cpp
		${CURRENT_DIR}/tnlTimerCPU.cpp
		${CURRENT_DIR}/mfilename.cpp
		${CURRENT_DIR}/mpi-supp.cpp
		${CURRENT_DIR}/tnlCuda.cpp
		${CURRENT_DIR}/tnlHost.cpp
		${CURRENT_DIR}/tnlOmp.cpp )
		${CURRENT_DIR}/tnlHost.cpp )

		IF( BUILD_CUDA )
		set( tnl_core_CUDA__SOURCES

src/core/terminal-colors.h

+7 −6

Original line number	Diff line number	Diff line
		@@ -18,12 +18,13 @@
		#ifndef TERMINAL_COLORS_H
		#define TERMINAL_COLORS_H

		const std::string red( "\033[0;31m" );
		const std::string green( "\033[1;32m" );
		const std::string yellow( "\033[1;33m" );
		const std::string cyan( "\033[0;36m" );
		const std::string magenta( "\033[0;35m" );
		const std::string reset( "\033[0m" );
		const tnlString red( "\033[0;31m" );
		const tnlString green( "\033[1;32m" );
		const tnlString yellow( "\033[1;33m" );
		const tnlString cyan( "\033[0;36m" );
		const tnlString magenta( "\033[0;35m" );
		const tnlString bold();
		const tnlString reset( "\033[0m" );


		#endif /* TERMINAL_COLORS_H */

src/core/tnlCuda.cu

+13 −10

Original line number	Diff line number	Diff line
		@@ -16,27 +16,30 @@
		***************************************************************************/

		#include <core/tnlCuda.h>
		#include <config/tnlConfigDescription.h>
		#include <config/tnlParameterContainer.h>

		void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix )

		/*void tnlCuda::configSetup( tnlConfigDescription& config, const tnlString& prefix )
		{
		#ifdef HAVE_CUDA
		//config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP.", true );
		//config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads.", omp_get_max_threads() );
		config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device.", 0 );
		#else
		//config.addEntry< bool >( prefix + "omp-enabled", "Enable support of OpenMP (not supported on this system).", false );
		//config.addEntry< int >( prefix + "omp-max-threads", "Set maximum number of OpenMP threads (not supported on this system).", 0 );
		config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device (CUDA is not supported on this system).", 0 );
		#endif

		}

		bool tnlCuda::setup( const tnlParameterContainer& parameters,
		const tnlString& prefix )
		{
		//enable = parameters.getParameter< bool >( prefix + "omp-enabled" );
		//maxThreadsCount = parameters.getParameter< int ( prefix + "omp-max-threads" );
		int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" );
		#ifdef HAVE_CUDA
		cudaSetDevice( cudaDevice );
		checkCudaDevice;
		#endif
		return true;
		}

		*/

		bool tnlCuda::checkDevice( const char* file_name, int line )
		{