Minor tweaks and OpenMP (dbfc942e) · Commits · TNL / tnl-dev

CMakeLists.txt

+1 −1

Original line number	Diff line number	Diff line
		@@ -164,7 +164,7 @@ AddCompilerFlag( "-std=c++11" )
		find_package( OpenMP )
		if( OPENMP_FOUND )
		message( "Compiler supports OpenMP." )
		#set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OPENMP -fopenmp")
		set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OPENMP -fopenmp")
		endif()

		####

build

0 → 100755

+123 −0

Original line number	Diff line number	Diff line
		#!/bin/bash

		TARGET=TNL
		PREFIX=${HOME}/local
		WITH_CUDA="yes"
		WITH_TESTS="yes"

		WITH_CUDA_ARCH="auto"
		WITH_CUBLAS="no"
		WITH_TEMPLATE_INSTANTIATION="yes"
		INSTANTIATE_LONG_INT="no"
		INSTANTIATE_INT="yes"
		INSTANTIATE_LONG_DOUBLE="no"
		INSTANTIATE_DOUBLE="yes"
		INSTANTIATE_FLOAT="no"
		CMAKE="cmake"
		CMAKE_ONLY="no"
		HELP="no"
		VERBOSE=""
		ROOT_DIR="."
		DCMTK_DIR="/usr/include/dcmtk"
		BUILD_JOBS=`grep -c processor /proc/cpuinfo`

		for option in "$@"
		do
		case $option in
		--prefix=* ) PREFIX="${option#*=}" ;;
		--build=* ) BUILD="${option#*=}" ;;
		--with-tests=* ) WITH_TESTS="${option#*=}" ;;
		--with-cuda=* ) WITH_CUDA="${option#*=}" ;;
		--with-cublas=* ) WITH_CUBLAS="${option#*=}" ;;
		--with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";;
		--with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
		--instantiate-long-int=* ) INSTANTIATE_LONG_INT="${option#*=}" ;;
		--instantiate-int=* ) INSTANTIATE_INT="${option#*=}" ;;
		--instantiate-long-double=* ) INSTANTIATE_LONG_DOUBLE="${option#*=}" ;;
		--instantiate-double=* ) INSTANTIATE_DOUBLE="${option#*=}" ;;
		--instantiate-float=* ) INSTANTIATE_FLOAT="${option#*=}" ;;
		--fast-build ) INSTANTIATE_LONG_INT="no"
		INSTANTIATE_INT="yes"
		INSTANTIATE_LONG_DOUBLE="no"
		INSTANTIATE_DOUBLE="yes"
		INSTANTIATE_FLOAT="no"
		WITH_CUDA_ARCH="auto" ;;
		--with-cmake=* ) CMAKE="${option#*=}" ;;
		--build-jobs=* ) BUILD_JOBS="${option#*=}" ;;
		--cmake-only=* ) CMAKE_ONLY="${option#*=}" ;;
		--verbose ) VERBOSE="VERBOSE=1" ;;
		--root-dir=* ) ROOT_DIR="${option#*=}" ;;
		--dcmtk-dir=* ) DCMTK_DIR="${option#*=}" ;;
		--help ) HELP="yes" ;;
		* )
		echo "Unknown option ${option}. Use --help for more information."
		exit 1 ;;
		esac
		done

		if test ${HELP} = "yes";
		then
		echo "TNL build options:"
		echo ""
		echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default."
		echo " --build=Debug/Release Build type."
		echo " --with-tests=yes/no Enable unit tests. 'yes' by default (libcppunit-dev is required)."
		echo " --with-cuda=yes/no Enable CUDA. 'yes' by default (CUDA Toolkit is required)."
		echo " --with-cuda-arch=all/auto/30/35/... Choose CUDA architecture."
		echo " --with-templates-instantiation=yes/no Some TNL templates are precompiled during the build. 'yes' by default."
		echo " --full-build Instantiate all -- long int indexing, float and long double floating point arithmetics."
		echo " --with-cmake=CMAKE Path to cmake. 'cmake' by default."
		echo " --build-jobs=NUM Number of processes to be used for the build. It is set to a number of CPU cores by default."
		echo " --verbose It enables verbose build."
		echo " --root-dir=PATH Path to the TNL source code root dir."
		echo " --dcmtk-dir=PATH Path to the DCMTK (Dicom Toolkit) root dir."
		echo " --help Write this help."
		exit 1
		fi

		echo "Configuring ${BUILD} $TARGET ..."

		${CMAKE} ${ROOT_DIR} \
		-DCMAKE_BUILD_TYPE=${BUILD} \
		-DCMAKE_INSTALL_PREFIX=${PREFIX} \
		-DWITH_CUDA=${WITH_CUDA} \
		-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} \
		-DWITH_CUBLAS=${WITH_CUBLAS} \
		-DWITH_TESTS=${WITH_TESTS} \
		-DPETSC_DIR=${PETSC_DIR} \
		-DDCMTK_DIR=${DCMTK_DIR} \
		-DWITH_TEMPLATE_INSTANTIATION=${WITH_TEMPLATE_INSTANTIATION} \
		-DINSTANTIATE_FLOAT=${INSTANTIATE_FLOAT} \
		-DINSTANTIATE_DOUBLE=${INSTANTIATE_DOUBLE} \
		-DINSTANTIATE_LONG_DOUBLE=${INSTANTIATE_LONG_DOUBLE} \
		-DINSTANTIATE_INT=${INSTANTIATE_INT} \
		-DINSTANTIATE_LONG_INT=${INSTANTIATE_LONG_INT}

		if test $? != 0; then
		echo "Error: cmake exited with error code."
		exit 1
		fi

		if test ${CMAKE_ONLY} = "yes";
		then
		exit 1
		fi

		echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."

		make -j${BUILD_JOBS} ${VERBOSE}
		if test $? != 0; then
		echo "Error: Build process failed."
		exit 1
		fi


		if test WITH_TESTS = "yes";
		then
		make -j${BUILD_JOBS} test
		if test $? != 0; then
		echo "Error: Some test did not pass successfuly."
		fi
		fi

		exit 0

examples/hamilton-jacobi-parallel-map/CMakeLists.txt

+1 −0

Original line number	Diff line number	Diff line
		@@ -3,6 +3,7 @@ set( tnl_hamilton_jacobi_parallel_map_SOURCES
		# tnlParallelMapSolver2D_impl.h
		# tnlParallelMapSolver.h
		# parallelMapConfig.h
		# main.cu
		main.cpp)

examples/hamilton-jacobi-parallel-map/main.h

+20 −21

Original line number	Diff line number	Diff line
		@@ -86,7 +86,6 @@ int main( int argc, char* argv[] )
		cout << "Starting solver loop..." << endl;
		solver.run();
		}
		// }
		}

examples/hamilton-jacobi-parallel-map/tnlParallelMapSolver.h

+1 −148

Original line number	Diff line number	Diff line
		@@ -129,10 +129,7 @@ public:
		int* calculationsCount_cuda;
		double* tmpw;
		double* tmp_map;
		//MeshTypeCUDA mesh_cuda, subMesh_cuda;
		//SchemeDevice scheme_cuda;
		//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
		//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;


		int* runcuda;
		int run_host;
		@@ -146,10 +143,6 @@ public:

		__device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID);

		/__global__ void runCUDA();/

		//__device__ void synchronizeCUDA();

		__device__ int getOwnerCUDA2D( int i) const;

		__device__ int getSubgridValueCUDA2D( int i ) const;
		@@ -160,137 +153,17 @@ public:

		__device__ void setBoundaryConditionCUDA2D( int i, int value );

		//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

		/__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int > caller);*/

		#endif

		};







		template<typename SchemeHost, typename SchemeDevice, typename Device>
		class tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >
		{
		public:

		typedef SchemeDevice SchemeTypeDevice;
		typedef SchemeHost SchemeTypeHost;
		typedef Device DeviceType;
		typedef tnlVector< double, tnlHost, int > VectorType;
		typedef tnlVector< int, tnlHost, int > IntVectorType;
		typedef tnlGrid< 3, double, tnlHost, int > MeshType;
		#ifdef HAVE_CUDA
		typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
		typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
		typedef tnlGrid< 3, double, tnlHost, int > MeshTypeCUDA;
		#endif
		tnlParallelMapSolver();
		bool init( const tnlParameterContainer& parameters );
		void run();

		void test();

		/private:/


		void synchronize();

		int getOwner( int i) const;

		int getSubgridValue( int i ) const;

		void setSubgridValue( int i, int value );

		int getBoundaryCondition( int i ) const;

		void setBoundaryCondition( int i, int value );

		void stretchGrid();

		void contractGrid();

		VectorType getSubgrid( const int i ) const;

		void insertSubgrid( VectorType u, const int i );

		VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);


		tnlMeshFunction<MeshType> u0;
		VectorType work_u;
		IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
		MeshType mesh, subMesh;
		SchemeHost schemeHost;
		SchemeDevice schemeDevice;
		double delta, tau0, stopTime,cflCondition;
		int gridRows, gridCols, gridLevels, currentStep, n;

		std::clock_t start;
		double time_diff;


		tnlDeviceEnum device;

		tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
		{
		return this;
		};

		#ifdef HAVE_CUDA

		tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;

		double* work_u_cuda;

		int* subgridValues_cuda;
		int* boundaryConditions_cuda;
		int* unusedCell_cuda;
		int* calculationsCount_cuda;
		double* tmpw;
		//MeshTypeCUDA mesh_cuda, subMesh_cuda;
		//SchemeDevice scheme_cuda;
		//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
		//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;

		int* runcuda;
		int run_host;


		__device__ void getSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);

		__device__ void updateSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);

		__device__ void insertSubgridCUDA3D( double u, const int i );

		__device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID);

		/__global__ void runCUDA();/

		//__device__ void synchronizeCUDA();

		__device__ int getOwnerCUDA3D( int i) const;

		__device__ int getSubgridValueCUDA3D( int i ) const;

		__device__ void setSubgridValueCUDA3D( int i, int value );

		__device__ int getBoundaryConditionCUDA3D( int i ) const;

		__device__ void setBoundaryConditionCUDA3D( int i, int value );

		//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

		/__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int > caller);*/

		#endif

		};



		@@ -313,26 +186,6 @@ __global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevi
		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);







		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void runCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);

		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void initRunCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);

		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void initCUDA3D( tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3);

		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void synchronizeCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

		template <typename SchemeHost, typename SchemeDevice, typename Device>
		__global__ void synchronize2CUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
		#endif