Making TNL objects accessible in CUDA kernels. (03332aa8) · Commits · TNL / tnl-dev

CMakeLists.txt

+10 −16

Original line number	Diff line number	Diff line
		cmake_minimum_required( VERSION 2.8 )
		cmake_minimum_required( VERSION 2.8.10 )

		project( tnl )

		@@ -33,6 +33,7 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" )
		endif()

		if( WITH_CUDA STREQUAL "yes" )
		#AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 --relocatable-device-code=yes --device-c" )
		AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" )
		else()
		AddCompilerFlag( "-std=gnu++0x" )
		@@ -45,21 +46,14 @@ if( NOT WITH_CUDA STREQUAL "no" )
		find_package( CUDA )
		if( CUDA_FOUND )
		set( BUILD_CUDA TRUE)
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
		if( CUDA_ARCHITECTURE STREQUAL "1.0" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10)
		endif()
		if( CUDA_ARCHITECTURE STREQUAL "1.1" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_11;-DCUDA_ARCH=11)
		endif()
		if( CUDA_ARCHITECTURE STREQUAL "1.2" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_12;-DCUDA_ARCH=12)
		endif()
		if( CUDA_ARCHITECTURE STREQUAL "1.3" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_13;-DCUDA_ARCH=13)
		endif()
		set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
		set(BUILD_SHARED_LIBS ON)
		set(CUDA_SEPARABLE_COMPILATION ON)

		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--shared;--compiler-options -fPIC)
		if( CUDA_ARCHITECTURE STREQUAL "2.0" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_20;-DCUDA_ARCH=20)
		#set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_20;-DCUDA_ARCH=20)
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)
		endif()
		if( CUDA_ARCHITECTURE STREQUAL "2.1" )
		set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21)

install

+1 −2

Original line number	Diff line number	Diff line
		@@ -6,12 +6,11 @@ WITH_CUDA=yes
		WITH_CUSPARSE=no
		CUDA_ARCHITECTURE=2.0
		TEMPLATE_EXPLICIT_INSTANTIATION=yes
		#VERBOSE="VERBOSE=1"
		VERBOSE="VERBOSE=1"

		CMAKE="cmake"
		CPUS=`grep -c processor /proc/cpuinfo`

		CPUS="1"
		echo "Building $TARGET using $CPUS processors."

		if [ ! -d Debug ];

src/CMakeLists.txt

+4 −4

Original line number	Diff line number	Diff line
		@@ -29,8 +29,8 @@ set( tnl_CUDA__SOURCES ${tnl_generators_CUDA__SOURCES}


		if( BUILD_CUDA )
		CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED
		${tnl_CUDA__SOURCES} )
		CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES}
		OPTIONS -arch sm_20 )
		else( BUILD_CUDA )
		ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED
		${tnl_SOURCES} )
		@@ -46,8 +46,8 @@ INSTALL( TARGETS tnl${debugExt}-${tnlVersion} DESTINATION lib )
		IF( BUILD_MPI )

		if( BUILD_CUDA )
		CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED
		${tnl_CUDA__SOURCES} )
		CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES}
		OPTIONS -arch sm_20 )
		else( BUILD_CUDA )
		ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED
		${tnl_SOURCES} )

src/core/arrays/tnlArray.h

+15 −0

Original line number	Diff line number	Diff line
		@@ -55,14 +55,23 @@ class tnlArray : public virtual tnlObject

		void reset();

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Index getSize() const;

		void setElement( const Index i, const Element& x );

		Element getElement( Index i ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element& operator[] ( Index i );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element& operator[] ( Index i ) const;

		tnlArray< Element, Device, Index >& operator = ( const tnlArray< Element, Device, Index >& array );
		@@ -78,8 +87,14 @@ class tnlArray : public virtual tnlObject

		void setValue( const Element& e );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element* getData() const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element* getData();

		/*!

src/core/arrays/tnlMultiArray.h

+64 −7

Original line number	Diff line number	Diff line
		@@ -48,34 +48,46 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev

		bool setDimensions( const tnlTuple< 1, Index >& dimensions );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		void getDimensions( Index& iSize ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const tnlTuple< 1, Index >& getDimensions() const;

		//! Set dimensions of the array using another array as a template
		template< typename MultiArray >
		bool setLike( const MultiArray& v );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Index getElementIndex( const Index i ) const;

		void setElement( const Index i, Element value );

		//! This method can be used for general access to the elements of the arrays.
		/*! It does not return reference but value. So it can be used to access
		* arrays in different adress space (usualy GPU device).
		* arrays in different address space (usually GPU device).
		* See also operator().
		*/
		Element getElement( const Index i ) const;

		//! Operator for accessing elements of the array.
		/*! It returns reference to given elements so it cannot be
		* used to access elements of arrays in different adress space
		* (GPU device usualy).
		*/
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element& operator()( const Index i );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element& operator()( const Index i ) const;


		template< typename MultiArray >
		bool operator == ( const MultiArray& array ) const;

		@@ -121,14 +133,23 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev

		bool setDimensions( const tnlTuple< 2, Index >& dimensions );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		void getDimensions( Index& jSize, Index& iSize ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const tnlTuple< 2, Index >& getDimensions() const;

		//! Set dimensions of the array using another array as a template
		template< typename MultiArray >
		bool setLike( const MultiArray& v );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Index getElementIndex( const Index j, const Index i ) const;

		void setElement( const Index j, const Index i, Element value );
		@@ -142,11 +163,17 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev

		//! Operator for accessing elements of the array.
		/*! It returns reference to given elements so it cannot be
		* used to access elements of arrays in different adress space
		* (GPU device usualy).
		* used to access elements of arrays in different address space
		* (GPU device usually).
		*/
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element& operator()( const Index j, const Index i );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element& operator()( const Index j, const Index i ) const;

		template< typename MultiArray >
		@@ -195,14 +222,23 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev

		bool setDimensions( const tnlTuple< 3, Index >& dimensions );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		void getDimensions( Index& k, Index& j, Index& iSize ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const tnlTuple< 3, Index >& getDimensions() const;

		//! Set dimensions of the array using another array as a template
		template< typename MultiArray >
		bool setLike( const MultiArray& v );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Index getElementIndex( const Index k, const Index j, const Index i ) const;

		void setElement( const Index k, const Index j, const Index i, Element value );
		@@ -219,8 +255,14 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev
		* used to access elements of arrays in different adress space
		* (GPU device usualy).
		*/
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element& operator()( const Index k, const Index j, const Index i );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element& operator()( const Index k, const Index j, const Index i ) const;

		template< typename MultiArray >
		@@ -269,14 +311,23 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev

		bool setDimensions( const tnlTuple< 4, Index >& dimensions );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const tnlTuple< 4, Index >& getDimensions() const;

		//! Set dimensions of the array using another array as a template
		template< typename MultiArray >
		bool setLike( const MultiArray& v );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const;

		void setElement( const Index l, const Index k, const Index j, const Index i, Element value );
		@@ -293,8 +344,14 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev
		* used to access elements of arrays in different adress space
		* (GPU device usualy).
		*/
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Element& operator()( const Index l, const Index k, const Index j, const Index i );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const;

		template< typename MultiArray >