Commit 5238ed66 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing the CUDA support for the dense matrix format.

parent d5a890c8
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" )
        set(BUILD_SHARED_LIBS ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
        
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep)
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA)
        if( CUDA_ARCHITECTURE STREQUAL "2.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)            
        endif()    
+1 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@

/***
 * This function returns minimum of two numbers stored on the device.
 * TODO: Make it tnlMin, tnlMax etc.
 */
template< class T > __device__ T tnlCudaMin( const T& a,
                                             const T& b )
+11 −4
Original line number Diff line number Diff line
@@ -45,6 +45,17 @@ class tnlCuda
#endif
   static inline int getMaxBlockSize();

#ifdef HAVE_CUDA
   __host__ __device__
#endif
static inline int getWarpSize();


#ifdef HAVE_CUDA
   __host__ __device__
#endif
   static inline int getNumberOfSharedMemoryBanks();

   static int getGPUTransferBufferSize();

   static size_t getFreeMemory();
@@ -56,10 +67,6 @@ class tnlCuda
   static void freeFromDevice( ObjectType* object );

#ifdef HAVE_CUDA
   static inline __host__ __device__ int getNumberOfSharedMemoryBanks();

   static inline __host__ __device__ int getWarpSize();

   template< typename Index >
   static __device__ Index getInterleaving( const Index index );
#endif
+0 −2
Original line number Diff line number Diff line
@@ -176,7 +176,6 @@ Element& tnlArray< Element, Device, Index > :: operator[] ( Index i )
                   << " index is " << i
                   << " and array size is " << this -> getSize() );
   return this->data[ i ];
   //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i );
};

template< typename Element,
@@ -193,7 +192,6 @@ const Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) cons
                   << " index is " << i
                   << " and array size is " << this -> getSize() );
   return this->data[ i ];
   //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i );
};

template< typename Element,
+18 −10
Original line number Diff line number Diff line
@@ -46,6 +46,24 @@ inline int tnlCuda::getMaxBlockSize()
   return 1024;
};

#ifdef HAVE_CUDA
__host__ __device__
#endif
inline int tnlCuda::getWarpSize()
{
   // TODO: make it preprocessor macro constant defined in tnlConfig
   return 32;
}

#ifdef HAVE_CUDA
__host__ __device__
#endif
inline int tnlCuda::getNumberOfSharedMemoryBanks()
{
   // TODO: make it preprocessor macro constant defined in tnlConfig
   return 32;
}


template< typename ObjectType >
ObjectType* tnlCuda::passToDevice( const ObjectType& object )
@@ -76,16 +94,6 @@ void tnlCuda::freeFromDevice( ObjectType* deviceObject )
   checkCudaDevice;
}

inline __host__ __device__ int tnlCuda::getNumberOfSharedMemoryBanks()
{
   return 32;
}

inline __host__ __device__ int tnlCuda::getWarpSize()
{
   return 32;
}

template< typename Index >
__device__ Index tnlCuda::getInterleaving( const Index index )
{
Loading