Loading CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() Loading src/core/cuda/reduction-operations.h +1 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,7 @@ /*** * This function returns minimum of two numbers stored on the device. * TODO: Make it tnlMin, tnlMax etc. */ template< class T > __device__ T tnlCudaMin( const T& a, const T& b ) Loading src/core/tnlCuda.h +11 −4 Original line number Diff line number Diff line Loading @@ -45,6 +45,17 @@ class tnlCuda #endif static inline int getMaxBlockSize(); #ifdef HAVE_CUDA __host__ __device__ #endif static inline int getWarpSize(); #ifdef HAVE_CUDA __host__ __device__ #endif static inline int getNumberOfSharedMemoryBanks(); static int getGPUTransferBufferSize(); static size_t getFreeMemory(); Loading @@ -56,10 +67,6 @@ class tnlCuda static void freeFromDevice( ObjectType* object ); #ifdef HAVE_CUDA static inline __host__ __device__ int getNumberOfSharedMemoryBanks(); static inline __host__ __device__ int getWarpSize(); template< typename Index > static __device__ Index getInterleaving( const Index index ); #endif Loading src/implementation/core/arrays/tnlArray_impl.h +0 −2 Original line number Diff line number Diff line Loading @@ -176,7 +176,6 @@ Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) << " index is " << i << " and array size is " << this -> getSize() ); return this->data[ i ]; //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i ); }; template< typename Element, Loading @@ -193,7 +192,6 @@ const Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) cons << " index is " << i << " and array size is " << this -> getSize() ); return this->data[ i ]; //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i ); }; template< typename Element, Loading src/implementation/core/tnlCuda_impl.h +18 −10 Original line number Diff line number Diff line Loading @@ -46,6 +46,24 @@ inline int tnlCuda::getMaxBlockSize() return 1024; }; #ifdef HAVE_CUDA __host__ __device__ #endif inline int tnlCuda::getWarpSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } #ifdef HAVE_CUDA __host__ __device__ #endif inline int tnlCuda::getNumberOfSharedMemoryBanks() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } template< typename ObjectType > ObjectType* tnlCuda::passToDevice( const ObjectType& object ) Loading Loading @@ -76,16 +94,6 @@ void tnlCuda::freeFromDevice( ObjectType* deviceObject ) checkCudaDevice; } inline __host__ __device__ int tnlCuda::getNumberOfSharedMemoryBanks() { return 32; } inline __host__ __device__ int tnlCuda::getWarpSize() { return 32; } template< typename Index > __device__ Index tnlCuda::getInterleaving( const Index index ) { Loading Loading
CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() Loading
src/core/cuda/reduction-operations.h +1 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,7 @@ /*** * This function returns minimum of two numbers stored on the device. * TODO: Make it tnlMin, tnlMax etc. */ template< class T > __device__ T tnlCudaMin( const T& a, const T& b ) Loading
src/core/tnlCuda.h +11 −4 Original line number Diff line number Diff line Loading @@ -45,6 +45,17 @@ class tnlCuda #endif static inline int getMaxBlockSize(); #ifdef HAVE_CUDA __host__ __device__ #endif static inline int getWarpSize(); #ifdef HAVE_CUDA __host__ __device__ #endif static inline int getNumberOfSharedMemoryBanks(); static int getGPUTransferBufferSize(); static size_t getFreeMemory(); Loading @@ -56,10 +67,6 @@ class tnlCuda static void freeFromDevice( ObjectType* object ); #ifdef HAVE_CUDA static inline __host__ __device__ int getNumberOfSharedMemoryBanks(); static inline __host__ __device__ int getWarpSize(); template< typename Index > static __device__ Index getInterleaving( const Index index ); #endif Loading
src/implementation/core/arrays/tnlArray_impl.h +0 −2 Original line number Diff line number Diff line Loading @@ -176,7 +176,6 @@ Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) << " index is " << i << " and array size is " << this -> getSize() ); return this->data[ i ]; //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i ); }; template< typename Element, Loading @@ -193,7 +192,6 @@ const Element& tnlArray< Element, Device, Index > :: operator[] ( Index i ) cons << " index is " << i << " and array size is " << this -> getSize() ); return this->data[ i ]; //return tnlArrayOperations< Device > :: getArrayElementReference( this -> data, i ); }; template< typename Element, Loading
src/implementation/core/tnlCuda_impl.h +18 −10 Original line number Diff line number Diff line Loading @@ -46,6 +46,24 @@ inline int tnlCuda::getMaxBlockSize() return 1024; }; #ifdef HAVE_CUDA __host__ __device__ #endif inline int tnlCuda::getWarpSize() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } #ifdef HAVE_CUDA __host__ __device__ #endif inline int tnlCuda::getNumberOfSharedMemoryBanks() { // TODO: make it preprocessor macro constant defined in tnlConfig return 32; } template< typename ObjectType > ObjectType* tnlCuda::passToDevice( const ObjectType& object ) Loading Loading @@ -76,16 +94,6 @@ void tnlCuda::freeFromDevice( ObjectType* deviceObject ) checkCudaDevice; } inline __host__ __device__ int tnlCuda::getNumberOfSharedMemoryBanks() { return 32; } inline __host__ __device__ int tnlCuda::getWarpSize() { return 32; } template< typename Index > __device__ Index tnlCuda::getInterleaving( const Index index ) { Loading