Loading configure.ac +1 −1 Original line number Diff line number Diff line Loading @@ -44,7 +44,7 @@ then STORE_CPPFLAGS=$CPPFLAGS STORE_LDFLAGS=$LDFLAGS CUDA_LDFLAGS="-L$CUDA_LIBS" CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA" CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA -arch sm_13" CXXFLAGS="$CXXFLAGS -I$CUDA_HEADERS" CPPFLAGS="$CPPFLAGS -I$CUDA_HEADERS" LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" Loading src/Makefile.am +1 −1 Original line number Diff line number Diff line Loading @@ -47,7 +47,7 @@ libtnl_mpi_dbg_0_1_la_LIBADD = debug/libtnldebug-mpi-dbg-0.1.la \ matrix/libtnlmatrix-mpi-dbg-0.1.la endif tnl_unit_tests_sources = tnl-unit-tests.cpp tnl_unit_tests_sources = tnl-unit-tests.cu check_PROGRAMS = tnl-unit-tests tnl_unit_tests_SOURCES = $(tnl_unit_tests_sources) Loading src/core/tnlLongVector.h +1 −0 Original line number Diff line number Diff line Loading @@ -146,6 +146,7 @@ template< class T > class tnlLongVector : public tnlObject { assert( long_vector. GetSize() == GetSize() ); memcpy( data, long_vector. Data(), GetSize() * sizeof( T ) ); return true; }; #ifdef HAVE_CUDA Loading src/core/tnlLongVectorCUDA.h +19 −9 Original line number Diff line number Diff line Loading @@ -35,10 +35,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject tnlLongVectorCUDA( int _size = 0 ) : size( _size ), shared_data( false ) { cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ); if( ! data ) if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; abort(); } //data ++; Loading @@ -48,10 +48,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject tnlLongVectorCUDA( const tnlLongVectorCUDA& v ) : tnlObject( v ), size( v. size ), shared_data( false ) { cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ); if( ! data ) if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; abort(); } //data ++; Loading @@ -69,14 +69,14 @@ template< class T > class tnlLongVectorCUDA : public tnlObject if( ! shared_data ) { cudaFree( data ); data = 0; data = NULL; } size = _size; cudaMalloc( ( void** ) &data, size * sizeof( T ) ); shared_data = false; if( ! data ) if( cudaMalloc( ( void** ) &data, size * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; size = 0; return false; } Loading Loading @@ -124,7 +124,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject bool copyFrom( const tnlLongVector< T >& long_vector ) { assert( long_vector. GetSize() == GetSize() ); cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice ); if( cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice ) != cudaSuccess ) { cerr << "Transfer of data from CUDA host ( " << long_vector. GetName() << " ) to CUDA device ( " << GetName() << " ) failed." << endl; return false; } return true; } Loading @@ -148,7 +153,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject template< class T > bool tnlLongVector< T > :: copyFrom( const tnlLongVectorCUDA< T >& cuda_vector ) { assert( cuda_vector. GetSize() == GetSize() ); cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost ); if( cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { cerr << "Transfer of data from CUDA device ( " << cuda_vector. GetName() << " ) to CUDA host ( " << GetName() << " ) failed." << endl; return false; } return true; } Loading src/core/tnlLongVectorCUDATester.cu +1 −1 Original line number Diff line number Diff line Loading @@ -7,6 +7,6 @@ void testKernelStarter( const float& number, const int size ) void testKernelStarter( const double& number, const int size ) { testKernel( number, size ); // testKernel( number, size ); } Loading
configure.ac +1 −1 Original line number Diff line number Diff line Loading @@ -44,7 +44,7 @@ then STORE_CPPFLAGS=$CPPFLAGS STORE_LDFLAGS=$LDFLAGS CUDA_LDFLAGS="-L$CUDA_LIBS" CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA" CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA -arch sm_13" CXXFLAGS="$CXXFLAGS -I$CUDA_HEADERS" CPPFLAGS="$CPPFLAGS -I$CUDA_HEADERS" LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" Loading
src/Makefile.am +1 −1 Original line number Diff line number Diff line Loading @@ -47,7 +47,7 @@ libtnl_mpi_dbg_0_1_la_LIBADD = debug/libtnldebug-mpi-dbg-0.1.la \ matrix/libtnlmatrix-mpi-dbg-0.1.la endif tnl_unit_tests_sources = tnl-unit-tests.cpp tnl_unit_tests_sources = tnl-unit-tests.cu check_PROGRAMS = tnl-unit-tests tnl_unit_tests_SOURCES = $(tnl_unit_tests_sources) Loading
src/core/tnlLongVector.h +1 −0 Original line number Diff line number Diff line Loading @@ -146,6 +146,7 @@ template< class T > class tnlLongVector : public tnlObject { assert( long_vector. GetSize() == GetSize() ); memcpy( data, long_vector. Data(), GetSize() * sizeof( T ) ); return true; }; #ifdef HAVE_CUDA Loading
src/core/tnlLongVectorCUDA.h +19 −9 Original line number Diff line number Diff line Loading @@ -35,10 +35,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject tnlLongVectorCUDA( int _size = 0 ) : size( _size ), shared_data( false ) { cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ); if( ! data ) if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; abort(); } //data ++; Loading @@ -48,10 +48,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject tnlLongVectorCUDA( const tnlLongVectorCUDA& v ) : tnlObject( v ), size( v. size ), shared_data( false ) { cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ); if( ! data ) if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; abort(); } //data ++; Loading @@ -69,14 +69,14 @@ template< class T > class tnlLongVectorCUDA : public tnlObject if( ! shared_data ) { cudaFree( data ); data = 0; data = NULL; } size = _size; cudaMalloc( ( void** ) &data, size * sizeof( T ) ); shared_data = false; if( ! data ) if( cudaMalloc( ( void** ) &data, size * sizeof( T ) ) != cudaSuccess ) { cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl; data = NULL; size = 0; return false; } Loading Loading @@ -124,7 +124,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject bool copyFrom( const tnlLongVector< T >& long_vector ) { assert( long_vector. GetSize() == GetSize() ); cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice ); if( cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice ) != cudaSuccess ) { cerr << "Transfer of data from CUDA host ( " << long_vector. GetName() << " ) to CUDA device ( " << GetName() << " ) failed." << endl; return false; } return true; } Loading @@ -148,7 +153,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject template< class T > bool tnlLongVector< T > :: copyFrom( const tnlLongVectorCUDA< T >& cuda_vector ) { assert( cuda_vector. GetSize() == GetSize() ); cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost ); if( cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost ) != cudaSuccess ) { cerr << "Transfer of data from CUDA device ( " << cuda_vector. GetName() << " ) to CUDA host ( " << GetName() << " ) failed." << endl; return false; } return true; } Loading
src/core/tnlLongVectorCUDATester.cu +1 −1 Original line number Diff line number Diff line Loading @@ -7,6 +7,6 @@ void testKernelStarter( const float& number, const int size ) void testKernelStarter( const double& number, const int size ) { testKernel( number, size ); // testKernel( number, size ); }