Implementing the CUDA support for the dense matrix format. (5238ed66) · Commits · TNL / tnl-dev

CMakeLists.txt

+1 −1

Original line number	Diff line number	Diff line
		@@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" )
		set(BUILD_SHARED_LIBS ON)
		set(CUDA_SEPARABLE_COMPILATION ON)

		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep)
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA)
		if( CUDA_ARCHITECTURE STREQUAL "2.0" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)
		endif()

+1 −0

Original line number	Diff line number	Diff line
		@@ -24,6 +24,7 @@

		/***
		* This function returns minimum of two numbers stored on the device.
		* TODO: Make it tnlMin, tnlMax etc.
		*/
		template< class T > __device__ T tnlCudaMin( const T& a,
		const T& b )

+11 −4

Original line number	Diff line number	Diff line
		@@ -45,6 +45,17 @@ class tnlCuda
		#endif
		static inline int getMaxBlockSize();

		#ifdef HAVE_CUDA
		__host__ __device__
		#endif
		static inline int getWarpSize();


		#ifdef HAVE_CUDA
		__host__ __device__
		#endif
		static inline int getNumberOfSharedMemoryBanks();

		static int getGPUTransferBufferSize();

		static size_t getFreeMemory();
		@@ -56,10 +67,6 @@ class tnlCuda
		static void freeFromDevice( ObjectType* object );

		#ifdef HAVE_CUDA
		static inline __host__ __device__ int getNumberOfSharedMemoryBanks();

		static inline __host__ __device__ int getWarpSize();

		template< typename Index >
		static __device__ Index getInterleaving( const Index index );
		#endif

+0 −2

Original line number	Diff line number	Diff line
		@@ -176,7 +176,6 @@ Element& tnlArray< Element, Device, Index > :: operator[] ( Index i )
		<< " index is " << i
		<< " and array size is " << this -> getSize() );
		return this->data[ i ];
		//return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i );
		};

		template< typename Element,
		@@ -193,7 +192,6 @@ const Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) cons
		<< " index is " << i
		<< " and array size is " << this -> getSize() );
		return this->data[ i ];
		//return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i );
		};

		template< typename Element,

+18 −10

Original line number	Diff line number	Diff line
		@@ -46,6 +46,24 @@ inline int tnlCuda::getMaxBlockSize()
		return 1024;
		};

		#ifdef HAVE_CUDA
		__host__ __device__
		#endif
		inline int tnlCuda::getWarpSize()
		{
		// TODO: make it preprocessor macro constant defined in tnlConfig
		return 32;
		}

		#ifdef HAVE_CUDA
		__host__ __device__
		#endif
		inline int tnlCuda::getNumberOfSharedMemoryBanks()
		{
		// TODO: make it preprocessor macro constant defined in tnlConfig
		return 32;
		}


		template< typename ObjectType >
		ObjectType* tnlCuda::passToDevice( const ObjectType& object )
		@@ -76,16 +94,6 @@ void tnlCuda::freeFromDevice( ObjectType* deviceObject )
		checkCudaDevice;
		}

		inline __host__ __device__ int tnlCuda::getNumberOfSharedMemoryBanks()
		{
		return 32;
		}

		inline __host__ __device__ int tnlCuda::getWarpSize()
		{
		return 32;
		}

		template< typename Index >
		__device__ Index tnlCuda::getInterleaving( const Index index )
		{