Commit 3b4ed37c authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Debuging CUDA kernels.

Clean up makefiles from CXXFLAGS.
parent 04c68239
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@ then
   STORE_CPPFLAGS=$CPPFLAGS
   STORE_LDFLAGS=$LDFLAGS
   CUDA_LDFLAGS="-L$CUDA_LIBS"
   CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA"
   CUDA_CXXFLAGS="-I$CUDA_HEADERS -DHAVE_CUDA -arch sm_13"
   CXXFLAGS="$CXXFLAGS -I$CUDA_HEADERS"
   CPPFLAGS="$CPPFLAGS -I$CUDA_HEADERS"
   LDFLAGS="$LDFLAGS $CUDA_LDFLAGS"
+1 −1
Original line number Diff line number Diff line
@@ -47,7 +47,7 @@ libtnl_mpi_dbg_0_1_la_LIBADD = debug/libtnldebug-mpi-dbg-0.1.la \
                       matrix/libtnlmatrix-mpi-dbg-0.1.la
endif                       

tnl_unit_tests_sources = tnl-unit-tests.cpp
tnl_unit_tests_sources = tnl-unit-tests.cu

check_PROGRAMS = tnl-unit-tests
tnl_unit_tests_SOURCES = $(tnl_unit_tests_sources)
+1 −0
Original line number Diff line number Diff line
@@ -146,6 +146,7 @@ template< class T > class tnlLongVector : public tnlObject
   {
      assert( long_vector. GetSize() == GetSize() );
      memcpy( data, long_vector. Data(), GetSize() * sizeof( T ) );
      return true;
   };

#ifdef HAVE_CUDA
+19 −9
Original line number Diff line number Diff line
@@ -35,10 +35,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject
   tnlLongVectorCUDA( int _size = 0 )
    : size( _size ), shared_data( false )
   {
      cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) );
      if( ! data )
      if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess  )
      {
         cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl;
         data = NULL;
         abort();
      }
      //data ++;
@@ -48,10 +48,10 @@ template< class T > class tnlLongVectorCUDA : public tnlObject
   tnlLongVectorCUDA( const tnlLongVectorCUDA& v )
    : tnlObject( v ), size( v. size ), shared_data( false )
   {
      cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) );
      if( ! data )
      if( cudaMalloc( ( void** ) &data, ( size + 1 ) * sizeof( T ) ) != cudaSuccess )
      {
         cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl;
         data = NULL;
         abort();
      }
      //data ++;
@@ -69,14 +69,14 @@ template< class T > class tnlLongVectorCUDA : public tnlObject
      if( ! shared_data )
      {
         cudaFree( data );
         data = 0;
         data = NULL;
      }
      size = _size;
      cudaMalloc( ( void** ) &data, size * sizeof( T ) );
      shared_data = false;
      if( ! data )
      if( cudaMalloc( ( void** ) &data, size * sizeof( T ) ) != cudaSuccess )
      {
         cerr << "Unable to allocate new long vector with size " << size << " on CUDA device." << endl;
         data = NULL;
         size = 0;
         return false;
      }
@@ -124,7 +124,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject
   bool copyFrom( const tnlLongVector< T >& long_vector )
   {
      assert( long_vector. GetSize() == GetSize() );
      cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice );
      if( cudaMemcpy( data, long_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyHostToDevice ) != cudaSuccess )
      {
         cerr << "Transfer of data from CUDA host ( " << long_vector. GetName()
              << " ) to CUDA device ( " << GetName() << " ) failed." << endl;
         return false;
      }
      return true;
   }

@@ -148,7 +153,12 @@ template< class T > class tnlLongVectorCUDA : public tnlObject
template< class T > bool tnlLongVector< T > :: copyFrom( const tnlLongVectorCUDA< T >& cuda_vector )
{
   assert( cuda_vector. GetSize() == GetSize() );
   cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost );
   if( cudaMemcpy( data, cuda_vector. Data(), GetSize() * sizeof( T ), cudaMemcpyDeviceToHost ) != cudaSuccess )
   {
      cerr << "Transfer of data from CUDA device ( " << cuda_vector. GetName()
           << " ) to CUDA host ( " << GetName() << " ) failed." << endl;
      return false;
   }
   return true;
}

+1 −1
Original line number Diff line number Diff line
@@ -7,6 +7,6 @@ void testKernelStarter( const float& number, const int size )

void testKernelStarter( const double& number, const int size )
{
   testKernel( number, size );
  // testKernel( number, size );
}
Loading