Commit dbfc942e authored by Tomas Sobotik's avatar Tomas Sobotik
Browse files

Minor tweaks and OpenMP

parent 4d189588
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -164,7 +164,7 @@ AddCompilerFlag( "-std=c++11" )
find_package( OpenMP ) 
if( OPENMP_FOUND )
   message( "Compiler supports OpenMP." )
   #set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OPENMP -fopenmp")
   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OPENMP -fopenmp")
endif()

####

build

0 → 100755
+123 −0
Original line number Diff line number Diff line
#!/bin/bash

TARGET=TNL
PREFIX=${HOME}/local
WITH_CUDA="yes"
WITH_TESTS="yes"

WITH_CUDA_ARCH="auto"
WITH_CUBLAS="no"
WITH_TEMPLATE_INSTANTIATION="yes"
INSTANTIATE_LONG_INT="no"
INSTANTIATE_INT="yes"
INSTANTIATE_LONG_DOUBLE="no"
INSTANTIATE_DOUBLE="yes"
INSTANTIATE_FLOAT="no"
CMAKE="cmake"
CMAKE_ONLY="no"
HELP="no"
VERBOSE=""
ROOT_DIR="."
DCMTK_DIR="/usr/include/dcmtk"
BUILD_JOBS=`grep -c processor /proc/cpuinfo`

for option in "$@"
do
    case $option in
        --prefix=*                       ) PREFIX="${option#*=}" ;;
        --build=*                        ) BUILD="${option#*=}" ;;
        --with-tests=*                   ) WITH_TESTS="${option#*=}" ;;
        --with-cuda=*                    ) WITH_CUDA="${option#*=}" ;;
        --with-cublas=*                  ) WITH_CUBLAS="${option#*=}" ;;
        --with-cuda-arch=*               ) WITH_CUDA_ARCH="${option#*=}";;
        --with-templates-instantiation=* ) WITH_TEMPLATE_INSTANTIATION="${option#*=}" ;;
        --instantiate-long-int=*         ) INSTANTIATE_LONG_INT="${option#*=}" ;;
        --instantiate-int=*              ) INSTANTIATE_INT="${option#*=}" ;;
        --instantiate-long-double=*      ) INSTANTIATE_LONG_DOUBLE="${option#*=}" ;;
        --instantiate-double=*           ) INSTANTIATE_DOUBLE="${option#*=}" ;;
        --instantiate-float=*            ) INSTANTIATE_FLOAT="${option#*=}" ;;
        --fast-build                     ) INSTANTIATE_LONG_INT="no"
                                           INSTANTIATE_INT="yes"
                                           INSTANTIATE_LONG_DOUBLE="no"
                                           INSTANTIATE_DOUBLE="yes"
                                           INSTANTIATE_FLOAT="no"
                                           WITH_CUDA_ARCH="auto" ;;
        --with-cmake=*                   ) CMAKE="${option#*=}" ;;
        --build-jobs=*                   ) BUILD_JOBS="${option#*=}" ;;
        --cmake-only=*                   ) CMAKE_ONLY="${option#*=}" ;;
        --verbose                        ) VERBOSE="VERBOSE=1" ;;
        --root-dir=*                     ) ROOT_DIR="${option#*=}" ;;
        --dcmtk-dir=*                    ) DCMTK_DIR="${option#*=}" ;;
        --help                           ) HELP="yes" ;;
        *                                ) 
           echo "Unknown option ${option}. Use --help for more information."
           exit 1 ;;
    esac
done

if test ${HELP} = "yes";
then
    echo "TNL build options:"
    echo ""
    echo "   --prefix=PATH                         Prefix for the installation directory. ${HOME}/local by default."
    echo "   --build=Debug/Release                 Build type."
    echo "   --with-tests=yes/no                   Enable unit tests. 'yes' by default (libcppunit-dev is required)."
    echo "   --with-cuda=yes/no                    Enable CUDA. 'yes' by default (CUDA Toolkit is required)."
    echo "   --with-cuda-arch=all/auto/30/35/...   Choose CUDA architecture."   
    echo "   --with-templates-instantiation=yes/no Some TNL templates are precompiled during the build. 'yes' by default."
    echo "   --full-build                          Instantiate all -- long int indexing, float and long double floating point arithmetics."
    echo "   --with-cmake=CMAKE                    Path to cmake. 'cmake' by default."
    echo "   --build-jobs=NUM                      Number of processes to be used for the build. It is set to a number of CPU cores by default."
    echo "   --verbose                             It enables verbose build."
    echo "   --root-dir=PATH                       Path to the TNL source code root dir."
    echo "   --dcmtk-dir=PATH                      Path to the DCMTK (Dicom Toolkit) root dir."
    echo "   --help                                Write this help."
    exit 1
fi

echo "Configuring ${BUILD} $TARGET ..."

${CMAKE} ${ROOT_DIR} \
         -DCMAKE_BUILD_TYPE=${BUILD} \
         -DCMAKE_INSTALL_PREFIX=${PREFIX} \
         -DWITH_CUDA=${WITH_CUDA} \
         -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} \
         -DWITH_CUBLAS=${WITH_CUBLAS} \
         -DWITH_TESTS=${WITH_TESTS} \
         -DPETSC_DIR=${PETSC_DIR} \
         -DDCMTK_DIR=${DCMTK_DIR} \
         -DWITH_TEMPLATE_INSTANTIATION=${WITH_TEMPLATE_INSTANTIATION} \
         -DINSTANTIATE_FLOAT=${INSTANTIATE_FLOAT} \
         -DINSTANTIATE_DOUBLE=${INSTANTIATE_DOUBLE} \
         -DINSTANTIATE_LONG_DOUBLE=${INSTANTIATE_LONG_DOUBLE} \
         -DINSTANTIATE_INT=${INSTANTIATE_INT} \
         -DINSTANTIATE_LONG_INT=${INSTANTIATE_LONG_INT}

if test $? != 0; then
    echo "Error: cmake exited with error code."
    exit 1
fi

if test ${CMAKE_ONLY} = "yes";
then
    exit 1
fi

echo "Building ${BUILD} $TARGET using $BUILD_JOBS processors ..."

make -j${BUILD_JOBS} ${VERBOSE}
if test $? != 0; then
    echo "Error: Build process failed."
    exit 1
fi


if test WITH_TESTS = "yes";
then
    make -j${BUILD_JOBS} test
    if test $? != 0; then
        echo "Error: Some test did not pass successfuly."
    fi
fi

exit 0
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ set( tnl_hamilton_jacobi_parallel_map_SOURCES
#     tnlParallelMapSolver2D_impl.h
#     tnlParallelMapSolver.h
#     parallelMapConfig.h 
#	  main.cu
     main.cpp)


+20 −21
Original line number Diff line number Diff line
@@ -86,7 +86,6 @@ int main( int argc, char* argv[] )
		   cout << "Starting solver loop..." << endl;
		   solver.run();
	   }
  // }
	}


+1 −148
Original line number Diff line number Diff line
@@ -129,10 +129,7 @@ public:
	int* calculationsCount_cuda;
	double* tmpw;
	double* tmp_map;
	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
	//SchemeDevice scheme_cuda;
	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;


	int* runcuda;
	int run_host;
@@ -146,10 +143,6 @@ public:

	__device__ void runSubgridCUDA2D( int boundaryCondition, double* u, int subGridID);

	/*__global__ void runCUDA();*/

	//__device__ void synchronizeCUDA();

	__device__ int getOwnerCUDA2D( int i) const;

	__device__ int getSubgridValueCUDA2D( int i ) const;
@@ -160,137 +153,17 @@ public:

	__device__ void setBoundaryConditionCUDA2D( int i, int value );

	//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

	/*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/

#endif

};







	template<typename SchemeHost, typename SchemeDevice, typename Device>
	class tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >
	{
	public:

		typedef SchemeDevice SchemeTypeDevice;
		typedef SchemeHost SchemeTypeHost;
		typedef Device DeviceType;
		typedef tnlVector< double, tnlHost, int > VectorType;
		typedef tnlVector< int, tnlHost, int > IntVectorType;
		typedef tnlGrid< 3, double, tnlHost, int > MeshType;
	#ifdef HAVE_CUDA
		typedef tnlVector< double, tnlHost, int > VectorTypeCUDA;
		typedef tnlVector< int, tnlHost, int > IntVectorTypeCUDA;
		typedef tnlGrid< 3, double, tnlHost, int > MeshTypeCUDA;
#endif
		tnlParallelMapSolver();
		bool init( const tnlParameterContainer& parameters );
		void run();

		void test();

	/*private:*/


		void synchronize();

		int getOwner( int i) const;

		int getSubgridValue( int i ) const;

		void setSubgridValue( int i, int value );

		int getBoundaryCondition( int i ) const;

		void setBoundaryCondition( int i, int value );

		void stretchGrid();

		void contractGrid();

		VectorType getSubgrid( const int i ) const;

		void insertSubgrid( VectorType u, const int i );

		VectorType runSubgrid( int boundaryCondition, VectorType u, int subGridID);


		tnlMeshFunction<MeshType> u0;
		VectorType work_u;
		IntVectorType subgridValues, boundaryConditions, unusedCell, calculationsCount;
		MeshType mesh, subMesh;
		SchemeHost schemeHost;
		SchemeDevice schemeDevice;
		double delta, tau0, stopTime,cflCondition;
		int gridRows, gridCols, gridLevels, currentStep, n;

		std::clock_t start;
		double time_diff;


		tnlDeviceEnum device;

		tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* getSelf()
		{
			return this;
};

#ifdef HAVE_CUDA

	tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver;

	double* work_u_cuda;

	int* subgridValues_cuda;
	int* boundaryConditions_cuda;
	int* unusedCell_cuda;
	int* calculationsCount_cuda;
	double* tmpw;
	//MeshTypeCUDA mesh_cuda, subMesh_cuda;
	//SchemeDevice scheme_cuda;
	//double delta_cuda, tau0_cuda, stopTime_cuda,cflCondition_cuda;
	//int gridRows_cuda, gridCols_cuda, currentStep_cuda, n_cuda;

	int* runcuda;
	int run_host;


	__device__ void getSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);

	__device__ void updateSubgridCUDA3D( const int i, tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller, double* a);

	__device__ void insertSubgridCUDA3D( double u, const int i );

	__device__ void runSubgridCUDA3D( int boundaryCondition, double* u, int subGridID);

	/*__global__ void runCUDA();*/

	//__device__ void synchronizeCUDA();

	__device__ int getOwnerCUDA3D( int i) const;

	__device__ int getSubgridValueCUDA3D( int i ) const;

	__device__ void setSubgridValueCUDA3D( int i, int value );

	__device__ int getBoundaryConditionCUDA3D( int i ) const;

	__device__ void setBoundaryConditionCUDA3D( int i, int value );

	//__device__ bool initCUDA( tnlParallelMapSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

	/*__global__ void initRunCUDA(tnlParallelMapSolver<Scheme, double, tnlHost, int >* caller);*/

#endif

};



@@ -313,26 +186,6 @@ __global__ void synchronizeCUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevi
template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void synchronize2CUDA2D(tnlParallelMapSolver<2, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);







template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void runCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);

template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void initRunCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* caller);

template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void initCUDA3D( tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, int * ptr2, int* ptr3);

template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void synchronizeCUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);

template <typename SchemeHost, typename SchemeDevice, typename Device>
__global__ void synchronize2CUDA3D(tnlParallelMapSolver<3, SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
#endif


Loading