Implementing test for CUDA memory operations. (dcf62dfd) · Commits · TNL / tnl-dev

buildAll

+2 −3

Original line number	Diff line number	Diff line
		@@ -9,7 +9,6 @@ VERBOSE=1

		CMAKE="cmake"
		CPUS=`grep -c processor /proc/cpuinfo`
		CPUS=1

		echo "Building $TARGET using $CPUS processors."

		@@ -26,10 +25,10 @@ cd Debug
		${CMAKE} .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR}
		make -j${CPUS} #VERBOSE=1
		make -j${CPUS} test
		make -j${CPUS} install
		#make -j${CPUS} install

		cd ../Release
		${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUSPARSE=${WITH_CUSPARSE} -DPETSC_DIR=${PETSC_DIR}
		make -j${CPUS} #VERBOSE=1
		make -j${CPUS} test
		make -j${CPUS} install
		#make -j${CPUS} install

src/core/CMakeLists.txt

+2 −1

Original line number	Diff line number	Diff line
		ADD_SUBDIRECTORY( cuda )

		set (headers tnlArray.h
		tnlAssert.h
		tnlCurve.h
		tnlCuda.h
		tnlCudaSupport.h
		tnlDataElement.h
		tnlDevice.h
		tnlFile.h

src/core/cuda/CMakeLists.txt

0 → 100755

+2 −0

Original line number	Diff line number	Diff line
		set( headers device-check.h
		)

src/core/cuda/device-check.h

0 → 100644

+34 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		device-check.h - description
		-------------------
		begin : Mar 20, 2013
		copyright : (C) 2013 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/***************************************************************************
		* *
		* This program is free software; you can redistribute it and/or modify *
		* it under the terms of the GNU General Public License as published by *
		* the Free Software Foundation; either version 2 of the License, or *
		* (at your option) any later version. *
		* *
		***************************************************************************/

		#ifndef ERROR_CHECK_H_
		#define ERROR_CHECK_H_

		#include <iostream>

		#ifdef HAVE_CUDA
		#include <cuda_runtime.h>
		#include <cuda.h>
		#endif

		using namespace std;

		#define checkCudaDevice __checkCudaDevice( __FILE__, __LINE__ )

		bool __checkCudaDevice( const char* file_name, int line );

		#endif /* ERROR_CHECK_H_ */

src/core/cuda/reduction.h

0 → 100644

+53 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		cuda-long-vector-kernels.h - description
		-------------------
		begin : Oct 28, 2010
		copyright : (C) 2010 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/***************************************************************************
		* *
		* This program is free software; you can redistribute it and/or modify *
		* it under the terms of the GNU General Public License as published by *
		* the Free Software Foundation; either version 2 of the License, or *
		* (at your option) any later version. *
		* *
		***************************************************************************/

		#ifndef CUDALONGVECTORKERNELS_H_
		#define CUDALONGVECTORKERNELS_H_

		#ifdef HAVE_CUDA
		#include <cuda.h>
		#endif
		#include <iostream>

		/***
		* The template calling the final CUDA kernel for the single vector reduction.
		* The template parameters are:
		* @param T is the type of data we want to reduce
		* @param operation is the operation reducing the data.
		* It can be tnlParallelReductionSum, tnlParallelReductionMin or tnlParallelReductionMax.
		* The function parameters:
		* @param size tells number of elements in the data array.
		* @param deviceInput1 is the pointer to an array storing the data we want
		* to reduce. This array must stay on the device!.
		* @param deviceInput2 is the pointer to an array storing the coupling data for example
		* the second vector for the SDOT operation. This array must stay on the device!.
		* @param result will contain the result of the reduction if everything was ok
		* and the return code is true.
		* @param parameter can be used for example for the passing the parameter p of Lp norm.
		* @param deviceAux is auxiliary array used to store temporary data during the reduction.
		* If one calls this function more then once one might provide this array to avoid repetetive
		* allocation of this array on the device inside of this function.
		* The size of this array should be size / 128 * sizeof( T ).
		*/
		template< typename Type, typename ParameterType, typename Index, tnlTupleOperation operation >
		bool tnlCUDALongVectorReduction( const Index size,
		const Type* deviceInput1,
		const Type* deviceInput2,
		Type& result,
		const ParameterType& parameter,
		Type* deviceAux = 0 );
		#endif /* CUDALONGVECTORKERNELS_H_ */