Commit 03332aa8 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Making TNL objects accessible in CUDA kernels.

parent f2cb41af
Loading
Loading
Loading
Loading
+10 −16
Original line number Diff line number Diff line
cmake_minimum_required( VERSION 2.8 )
cmake_minimum_required( VERSION 2.8.10 )

project( tnl )

@@ -33,6 +33,7 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" )
endif()   

if( WITH_CUDA STREQUAL "yes" )
   #AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 --relocatable-device-code=yes --device-c" )
   AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" )
else()
   AddCompilerFlag( "-std=gnu++0x" )
@@ -45,21 +46,14 @@ if( NOT WITH_CUDA STREQUAL "no" )
    find_package( CUDA )
    if( CUDA_FOUND )
        set( BUILD_CUDA TRUE)
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
        if( CUDA_ARCHITECTURE STREQUAL "1.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10)
        endif()
        if( CUDA_ARCHITECTURE STREQUAL "1.1" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_11;-DCUDA_ARCH=11)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "1.2" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_12;-DCUDA_ARCH=12)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "1.3" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_13;-DCUDA_ARCH=13)
        endif()    
        set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
        set(BUILD_SHARED_LIBS ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
        
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--shared;--compiler-options -fPIC)
        if( CUDA_ARCHITECTURE STREQUAL "2.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_20;-DCUDA_ARCH=20)
            #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_20;-DCUDA_ARCH=20)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)            
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "2.1" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21)
+1 −2
Original line number Diff line number Diff line
@@ -6,12 +6,11 @@ WITH_CUDA=yes
WITH_CUSPARSE=no
CUDA_ARCHITECTURE=2.0
TEMPLATE_EXPLICIT_INSTANTIATION=yes
#VERBOSE="VERBOSE=1"
VERBOSE="VERBOSE=1"

CMAKE="cmake"
CPUS=`grep -c processor /proc/cpuinfo`

CPUS="1"
echo "Building $TARGET using $CPUS processors."

if [ ! -d Debug ];
+4 −4
Original line number Diff line number Diff line
@@ -29,8 +29,8 @@ set( tnl_CUDA__SOURCES ${tnl_generators_CUDA__SOURCES}
                 
                 
if( BUILD_CUDA )
   CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED 
                     ${tnl_CUDA__SOURCES} )
   CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES}
                                                  OPTIONS -arch sm_20 )
else( BUILD_CUDA )
   ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED 
                ${tnl_SOURCES} )
@@ -46,8 +46,8 @@ INSTALL( TARGETS tnl${debugExt}-${tnlVersion} DESTINATION lib )
IF( BUILD_MPI )
   
   if( BUILD_CUDA )
      CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED
                        ${tnl_CUDA__SOURCES} )
      CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} 
                                                         OPTIONS -arch sm_20 )
   else( BUILD_CUDA )
         ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED
                      ${tnl_SOURCES} )  
+15 −0
Original line number Diff line number Diff line
@@ -55,14 +55,23 @@ class tnlArray : public virtual tnlObject

   void reset();

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Index getSize() const;

   void setElement( const Index i, const Element& x );

   Element getElement( Index i ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element& operator[] ( Index i );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element& operator[] ( Index i ) const;

   tnlArray< Element, Device, Index >& operator = ( const tnlArray< Element, Device, Index >& array );
@@ -78,8 +87,14 @@ class tnlArray : public virtual tnlObject

   void setValue( const Element& e );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element* getData() const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element* getData();

   /*!
+64 −7
Original line number Diff line number Diff line
@@ -48,34 +48,46 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev

   bool setDimensions( const tnlTuple< 1, Index >& dimensions );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   void getDimensions( Index& iSize ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const tnlTuple< 1, Index >& getDimensions() const;

   //! Set dimensions of the array using another array as a template
   template< typename MultiArray >
   bool setLike( const MultiArray& v );
   
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Index getElementIndex( const Index i ) const;

   void setElement( const Index i, Element value );

   //! This method can be used for general access to the elements of the arrays.
   /*! It does not return reference but value. So it can be used to access
    *  arrays in different adress space (usualy GPU device).
    *  arrays in different address space (usually GPU device).
    *  See also operator().
    */
   Element getElement( const Index i ) const;

   //! Operator for accessing elements of the array.
   /*! It returns reference to given elements so it cannot be
    *  used to access elements of arrays in different adress space
    *  (GPU device usualy).
    */
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element& operator()( const Index i );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element& operator()( const Index i ) const;


   template< typename MultiArray >
   bool operator == ( const MultiArray& array ) const;

@@ -121,14 +133,23 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev

   bool setDimensions( const tnlTuple< 2, Index >& dimensions );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   void getDimensions( Index& jSize, Index& iSize ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const tnlTuple< 2, Index >& getDimensions() const;

   //! Set dimensions of the array using another array as a template
   template< typename MultiArray >
   bool setLike( const MultiArray& v );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Index getElementIndex( const Index j, const Index i ) const;

   void setElement( const Index j, const Index i, Element value );
@@ -142,11 +163,17 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev

   //! Operator for accessing elements of the array.
   /*! It returns reference to given elements so it cannot be
    *  used to access elements of arrays in different adress space
    *  (GPU device usualy).
    *  used to access elements of arrays in different address space
    *  (GPU device usually).
    */
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element& operator()( const Index j, const Index i );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element& operator()( const Index j, const Index i ) const;

   template< typename MultiArray >
@@ -195,14 +222,23 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev

   bool setDimensions( const tnlTuple< 3, Index >& dimensions );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   void getDimensions( Index& k, Index& j, Index& iSize ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const tnlTuple< 3, Index >& getDimensions() const;

   //! Set dimensions of the array using another array as a template
   template< typename MultiArray >
   bool setLike( const MultiArray& v );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Index getElementIndex( const Index k, const Index j, const Index i ) const;

   void setElement( const Index k, const Index j, const Index i, Element value );
@@ -219,8 +255,14 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev
    *  used to access elements of arrays in different adress space
    *  (GPU device usualy).
    */
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element& operator()( const Index k, const Index j, const Index i );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element& operator()( const Index k, const Index j, const Index i ) const;

   template< typename MultiArray >
@@ -269,14 +311,23 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev

   bool setDimensions( const tnlTuple< 4, Index >& dimensions );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const tnlTuple< 4, Index >& getDimensions() const;

   //! Set dimensions of the array using another array as a template
   template< typename MultiArray >
   bool setLike( const MultiArray& v );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const;

   void setElement( const Index l, const Index k, const Index j, const Index i, Element value );
@@ -293,8 +344,14 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev
    *  used to access elements of arrays in different adress space
    *  (GPU device usualy).
    */
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   Element& operator()( const Index l, const Index k, const Index j, const Index i );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const;

   template< typename MultiArray >
Loading