Commit 91ab8e1a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Improving data transfers between the GPU and the CPU.

parent ff8de504
Loading
Loading
Loading
Loading
+21 −89
Original line number Diff line number Diff line
@@ -27,57 +27,29 @@ class tnlCuda
{
   public:

   static tnlString getDeviceType()
   {
      return tnlString( "tnlCuda" );
   }
   static tnlString getDeviceType();

   static tnlDeviceEnum getDevice()
   {
      return tnlCudaDevice;
   };
   static tnlDeviceEnum getDevice();

   template< typename Element, typename Index >
   static void allocateMemory( Element*& data, const Index size )
   {
      allocateMemoryCuda( data, size );
   }
   static void allocateMemory( Element*& data, const Index size );

   template< typename Element >
   static void freeMemory( Element* data )
   {
      freeMemoryCuda( data );
   }
   static void freeMemory( Element* data );


   template< typename Element >
   static void setMemoryElement( Element* data,
                                 const Element& value )
   {
      setMemoryCuda( data, value, 1 );
   }
                                 const Element& value );

   template< typename Element >
   static Element getMemoryElement( const Element* data )
   {
      Element result;
      copyMemoryCudaToHost( &result, data, 1 );
      return result;
   }
   static Element getMemoryElement( const Element* data );

   template< typename Element, typename Index >
   static Element& getArrayElementReference( Element* data, const Index i )
   {
      tnlAssert( false, );
      abort();
   }
   static Element& getArrayElementReference( Element* data, const Index i );

   template< typename Element, typename Index >
   static const Element& getArrayElementReference(const Element* data, const Index i )
   {
      tnlAssert( false, );
      abort();
   }
   static const Element& getArrayElementReference(const Element* data, const Index i );

   template< typename DestinationElement,
             typename SourceElement,
@@ -85,76 +57,36 @@ class tnlCuda
             typename Device >
   static bool memcpy( DestinationElement* destination,
                       const SourceElement* source,
                       const Index size )
   {
      switch( Device :: getDevice() )
      {
         case tnlHostDevice:
            return copyMemoryHostToCuda( destination, source, size );
         case tnlCudaDevice:
            return copyMemoryCudaToCuda( destination, source, size );
      }
      return true;
   }

                       const Index size );

   template< typename Element, typename Index, typename Device >
   static bool memcpy( Element* destination,
                       const Element* source,
                       const Index size )
   {
      return tnlCuda :: memcpy< Element, Element, Index, Device >
                              ( destination,
                                source,
                                size );
   }
                       const Index size );

   template< typename Element, typename Index, typename Device >
   static bool memcmp( const Element* data1,
                       const Element* data2,
                       const Index size )
   {
      switch( Device :: getDevice() )
      {
         case tnlHostDevice:
            return compareMemoryHostCuda( data2, data1, size );
         case tnlCudaDevice:
            return compareMemoryCuda( data1, data2, size );
      }
   }
                       const Index size );

   template< typename Element, typename Index >
   static bool memset( Element* destination,
                       const Element& value,
                       const Index size )
   {
      return setMemoryCuda( destination, value, size );
   }
                       const Index size );

   static int getMaxGridSize()
   {
      return maxGridSize;
   }
   static int getMaxGridSize();

   static void setMaxGridSize( int newMaxGridSize )
   {
      maxGridSize = newMaxGridSize;
   }
   static void setMaxGridSize( int newMaxGridSize );

   static int getMaxBlockSize()
   {
      return maxBlockSize;
   }
   static int getMaxBlockSize();

   static void setMaxBlockSize( int newMaxBlockSize )
   {
      maxBlockSize = newMaxBlockSize;
   }
   static void setMaxBlockSize( int newMaxBlockSize );

   protected:

   static int maxGridSize, maxBlockSize;
};

#include <implementation/core/tnlCuda_impl.h>

#endif /* TNLCUDA_H_ */
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ ADD_SUBDIRECTORY( cuda )
SET( headers vector-operations.h
             memory-operations.h
             tnlArray_impl.h
             tnlCuda_impl.h
             tnlHost_impl.h
             tnlLogger_impl.h
             tnlMultiArray1D_impl.h
+70 −27
Original line number Diff line number Diff line
@@ -87,11 +87,11 @@ __global__ void setVectorValueCudaKernel( Element* data,
                                          const Element value )
{
   Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
   const Index gridSize = blockDim. x * gridDim. x;
   const Index maxGridSize = blockDim. x * gridDim. x;
   while( elementIdx < size )
   {
      data[ elementIdx ] = value;
      elementIdx += gridSize;
      elementIdx += maxGridSize;
   }
}
#endif
@@ -159,6 +159,42 @@ bool copyMemoryHostToCuda( Element* destination,
#endif
}

template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool copyMemoryHostToCuda( DestinationElement* destination,
                           const SourceElement* source,
                           const Index size )
{
#ifdef HAVE_CUDA
   DestinationElement* buffer = new DestinationElement[ tnlGPUvsCPUTransferBufferSize ];
   if( ! buffer )
   {
      cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl;
      return false;
   }
   Index i( 0 );
   while( i < size )
   {
      Index j( 0 );
      while( j < tnlGPUvsCPUTransferBufferSize && i + j < size )
         buffer[ j ] = source[ i + j++ ];
      if( ! copyMemoryHostTuCuda( buffer,
                                  &destination[ i ],
                                  j ) )
      {
         delete[] buffer;
         return false;
      }
      i += j;
   }
   delete[] buffer;
   return true;
#else
   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
   return false;
#endif
}

template< typename Element, typename Index >
bool copyMemoryCudaToHost( Element* destination,
@@ -189,22 +225,29 @@ bool copyMemoryCudaToHost( DestinationElement* destination,
                           const SourceElement* source,
                           const Index size )
{
#ifdef HAVE_CUDA
   abort(); // TODO: fix this
   cudaMemcpy( destination,
               source,
               size * sizeof( SourceElement ),
               cudaMemcpyDeviceToHost );
   if( ! checkCudaDevice )
   SourceElement* buffer = new SourceElement[ tnlGPUvsCPUTransferBufferSize ];
   if( ! buffer )
   {
      cerr << "Transfer of data from CUDA device to host failed." << endl;
      cerr << "Unable to allocate supporting buffer to transfer data between the CUDA device and the host." << endl;
      return false;
   }
   return true;
#else
   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
   Index i( 0 );
   while( i < size )
   {
      if( ! copyMemoryCudaToHost( &source[ i ],
                                  buffer,
                                  Min( size - i, tnlGPUvsCPUTransferBufferSize ) ) )
      {
         delete[] buffer;
         return false;
#endif
      }
      Index j( 0 );
      while( j < tnlGPUvsCPUTransferBufferSize && i + j < size )
         destination[ i + j ] = buffer[ j++ ];
      i += j;
   }
   delete[] buffer;
   return true;
}


@@ -363,19 +406,19 @@ extern template bool setMemoryHost( float* data, const float& value, const long
extern template bool setMemoryHost( double* data, const double& value, const long int size );
extern template bool setMemoryHost( long double* data, const long double& value, const long int size );

extern template bool setMemoryCuda( char* data, const char& value, const int size );
extern template bool setMemoryCuda( int* data, const int& value, const int size );
extern template bool setMemoryCuda( long int* data, const long int& value, const int size );
extern template bool setMemoryCuda( float* data, const float& value, const int size );
extern template bool setMemoryCuda( double* data, const double& value, const int size );
extern template bool setMemoryCuda( long double* data, const long double& value, const int size );

extern template bool setMemoryCuda( char* data, const char& value, const long int size );
extern template bool setMemoryCuda( int* data, const int& value, const long int size );
extern template bool setMemoryCuda( long int* data, const long int& value, const long int size );
extern template bool setMemoryCuda( float* data, const float& value, const long int size );
extern template bool setMemoryCuda( double* data, const double& value, const long int size );
extern template bool setMemoryCuda( long double* data, const long double& value, const long int size );
extern template bool setMemoryCuda( char* data, const char& value, const int size, const int maxGridSize );
extern template bool setMemoryCuda( int* data, const int& value, const int size, const int maxGridSize );
extern template bool setMemoryCuda( long int* data, const long int& value, const int size, const int maxGridSize );
extern template bool setMemoryCuda( float* data, const float& value, const int size, const int maxGridSize );
extern template bool setMemoryCuda( double* data, const double& value, const int size, const int maxGridSize );
extern template bool setMemoryCuda( long double* data, const long double& value, const int size, const int maxGridSize );

extern template bool setMemoryCuda( char* data, const char& value, const long int size, const int maxGridSize );
extern template bool setMemoryCuda( int* data, const int& value, const long int size, const int maxGridSize );
extern template bool setMemoryCuda( long int* data, const long int& value, const long int size, const int maxGridSize );
extern template bool setMemoryCuda( float* data, const float& value, const long int size, const int maxGridSize );
extern template bool setMemoryCuda( double* data, const double& value, const long int size, const int maxGridSize );
extern template bool setMemoryCuda( long double* data, const long double& value, const long int size, const int maxGridSize );

extern template bool copyMemoryHostToHost( char* destination, const char* source, const int size );
extern template bool copyMemoryHostToHost( int* destination, const int* source, const int size );
+14 −14
Original line number Diff line number Diff line
@@ -75,19 +75,19 @@ template bool setMemoryHost( float* data, const float& value, const long int siz
template bool setMemoryHost( double* data, const double& value, const long int size );
template bool setMemoryHost( long double* data, const long double& value, const long int size );

template bool setMemoryCuda( char* data, const char& value, const int size );
template bool setMemoryCuda( int* data, const int& value, const int size );
template bool setMemoryCuda( long int* data, const long int& value, const int size );
template bool setMemoryCuda( float* data, const float& value, const int size );
template bool setMemoryCuda( double* data, const double& value, const int size );
template bool setMemoryCuda( long double* data, const long double& value, const int size );

template bool setMemoryCuda( char* data, const char& value, const long int size );
template bool setMemoryCuda( int* data, const int& value, const long int size );
template bool setMemoryCuda( long int* data, const long int& value, const long int size );
template bool setMemoryCuda( float* data, const float& value, const long int size );
template bool setMemoryCuda( double* data, const double& value, const long int size );
template bool setMemoryCuda( long double* data, const long double& value, const long int size );
template bool setMemoryCuda( char* data, const char& value, const int size, const int maxGridSize );
template bool setMemoryCuda( int* data, const int& value, const int size, const int maxGridSize );
template bool setMemoryCuda( long int* data, const long int& value, const int size, const int maxGridSize );
template bool setMemoryCuda( float* data, const float& value, const int size, const int maxGridSize );
template bool setMemoryCuda( double* data, const double& value, const int size, const int maxGridSize );
template bool setMemoryCuda( long double* data, const long double& value, const int size, const int maxGridSize );

template bool setMemoryCuda( char* data, const char& value, const long int size, const int maxGridSize );
template bool setMemoryCuda( int* data, const int& value, const long int size, const int maxGridSize );
template bool setMemoryCuda( long int* data, const long int& value, const long int size, const int maxGridSize );
template bool setMemoryCuda( float* data, const float& value, const long int size, const int maxGridSize );
template bool setMemoryCuda( double* data, const double& value, const long int size, const int maxGridSize );
template bool setMemoryCuda( long double* data, const long double& value, const long int size, const int maxGridSize );

template bool copyMemoryHostToHost( char* destination, const char* source, const int size );
template bool copyMemoryHostToHost( int* destination, const int* source, const int size );
+146 −0
Original line number Diff line number Diff line
/***************************************************************************
                          tnlCuda_impl.h  -  description
                             -------------------
    begin                : Jul 11, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef TNLCUDA_IMPL_H_
#define TNLCUDA_IMPL_H_

inline tnlString tnlCuda :: getDeviceType()
{
   return tnlString( "tnlCuda" );
}

inline tnlDeviceEnum tnlCuda :: getDevice()
{
   return tnlCudaDevice;
};

template< typename Element, typename Index >
void tnlCuda :: allocateMemory( Element*& data, const Index size )
{
   allocateMemoryCuda( data, size );
}

template< typename Element >
void tnlCuda :: freeMemory( Element* data )
{
   freeMemoryCuda( data );
}


template< typename Element >
void tnlCuda :: setMemoryElement( Element* data,
                                         const Element& value )
{
   setMemoryCuda( data, value, 1, maxGridSize );
}

template< typename Element >
Element tnlCuda :: getMemoryElement( const Element* data )
{
   Element result;
   copyMemoryCudaToHost( &result, data, 1 );
   return result;
}

template< typename Element, typename Index >
Element& tnlCuda :: getArrayElementReference( Element* data, const Index i )
{
   tnlAssert( false, );
   abort();
}

template< typename Element, typename Index >
const Element& tnlCuda :: getArrayElementReference(const Element* data, const Index i )
{
   tnlAssert( false, );
   abort();
}

template< typename DestinationElement,
          typename SourceElement,
          typename Index,
          typename Device >
bool tnlCuda :: memcpy( DestinationElement* destination,
                        const SourceElement* source,
                        const Index size )
{
   switch( Device :: getDevice() )
   {
      case tnlHostDevice:
         return copyMemoryHostToCuda( destination, source, size );
      case tnlCudaDevice:
         return copyMemoryCudaToCuda( destination, source, size );
   }
   return true;
}


template< typename Element, typename Index, typename Device >
bool tnlCuda :: memcpy( Element* destination,
                        const Element* source,
                        const Index size )
{
   return tnlCuda :: memcpy< Element, Element, Index, Device >
                           ( destination,
                             source,
                             size );
}

template< typename Element, typename Index, typename Device >
bool tnlCuda :: memcmp( const Element* data1,
                        const Element* data2,
                        const Index size )
{
   switch( Device :: getDevice() )
   {
      case tnlHostDevice:
         return compareMemoryHostCuda( data2, data1, size );
      case tnlCudaDevice:
         return compareMemoryCuda( data1, data2, size );
   }
}

template< typename Element, typename Index >
bool tnlCuda :: memset( Element* destination,
                        const Element& value,
                        const Index size )
{
   return setMemoryCuda( destination, value, size, maxGridSize );
}

inline int tnlCuda :: getMaxGridSize()
{
   return maxGridSize;
}

inline void tnlCuda :: setMaxGridSize( int newMaxGridSize )
{
   maxGridSize = newMaxGridSize;
}

inline int tnlCuda :: getMaxBlockSize()
{
   return maxBlockSize;
}

inline void tnlCuda :: setMaxBlockSize( int newMaxBlockSize )
{
   maxBlockSize = newMaxBlockSize;
}


#endif /* TNLCUDA_IMPL_H_ */
Loading