diff --git a/src/core/tnlCuda.h b/src/core/tnlCuda.h index 6d3dd31b0fabd01b45bde8d616f16cf8edd139d1..f0de5888720137511441ad30fd65a6a892f6871d 100644 --- a/src/core/tnlCuda.h +++ b/src/core/tnlCuda.h @@ -79,9 +79,12 @@ class tnlCuda abort(); } - template< typename Element, typename Index, typename Device > - static bool memcpy( Element* destination, - const Element* source, + template< typename DestinationElement, + typename SourceElement, + typename Index, + typename Device > + static bool memcpy( DestinationElement* destination, + const SourceElement* source, const Index size ) { switch( Device :: getDevice() ) @@ -94,6 +97,18 @@ class tnlCuda return true; } + + template< typename Element, typename Index, typename Device > + static bool memcpy( Element* destination, + const Element* source, + const Index size ) + { + return tnlCuda :: memcpy< Element, Element, Index, Device > + ( destination, + source, + size ); + } + template< typename Element, typename Index, typename Device > static bool memcmp( const Element* data1, const Element* data2, diff --git a/src/core/tnlHost.h b/src/core/tnlHost.h index d2edb0ca7f09d83e1c6b12d7af6c486b264b30e0..93f7ef84c745f4245ca1a631bb7f5dccc22e6f0e 100644 --- a/src/core/tnlHost.h +++ b/src/core/tnlHost.h @@ -49,6 +49,11 @@ class tnlHost template< typename Element, typename Index > static const Element& getArrayElementReference(const Element* data, const Index i ); + template< typename DestinationElement, typename SourceElement, typename Index, typename Device > + static bool memcpy( DestinationElement* destination, + const SourceElement* source, + const Index size ); + template< typename Element, typename Index, typename Device > static bool memcpy( Element* destination, const Element* source, @@ -56,8 +61,8 @@ class tnlHost template< typename Element, typename Index, typename Device > static bool memcmp( const Element* data1, - const Element* data2, - const Index size ); + const Element* data2, + const Index size ); template< typename Element, typename Index > static bool memset( Element* destination, diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h index 20f72e1435d3611320d24e263d340f6eb74828eb..cfc3fe2a4120720b535150b10184ff9753f719af 100644 --- a/src/implementation/core/memory-operations.h +++ b/src/implementation/core/memory-operations.h @@ -23,6 +23,7 @@ #include <core/cuda/reduction-operations.h> #include <core/mfuncs.h> #include <tnlConfig.h> +#include <string.h> const int tnlGPUvsCPUTransferBufferSize( 1 << 20 ); @@ -115,16 +116,26 @@ bool setMemoryCuda( Element* data, } +template< typename DestinationElement, typename SourceElement, typename Index > +bool copyMemoryHostToHost( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + for( Index i = 0; i < size; i ++ ) + destination[ i ] = ( DestinationElement) source[ i ]; + return true; +} + template< typename Element, typename Index > bool copyMemoryHostToHost( Element* destination, const Element* source, const Index size ) { - for( Index i = 0; i < size; i ++ ) - destination[ i ] = source[ i ]; + memcpy( destination, source, size * sizeof( Element ) ); return true; } + template< typename Element, typename Index > bool copyMemoryHostToCuda( Element* destination, const Element* source, @@ -170,6 +181,32 @@ bool copyMemoryCudaToHost( Element* destination, #endif } +template< typename DestinationElement, + typename SourceElement, + typename Index > +bool copyMemoryCudaToHost( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ +#ifdef HAVE_CUDA + abort(); // TODO: fix this + cudaMemcpy( destination, + source, + size * sizeof( Element ), + cudaMemcpyDeviceToHost ); + if( ! checkCudaDevice ) + { + cerr << "Transfer of data from CUDA device to host failed." << endl; + return false; + } + return true; +#else + cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; + return false; +#endif +} + + template< typename Element, typename Index > bool copyMemoryCudaToCuda( Element* destination, const Element* source, diff --git a/src/implementation/core/tnlArray_impl.h b/src/implementation/core/tnlArray_impl.h index 466ec12e33aca5cec16c6f3b539cf628e97349e8..c96ad724d15dff80dba00745bbedb3cfcaa6be46 100644 --- a/src/implementation/core/tnlArray_impl.h +++ b/src/implementation/core/tnlArray_impl.h @@ -213,9 +213,10 @@ tnlArray< Element, Device, Index >& << "Source size: " << array. getSize() << endl << "Target name: " << this -> getName() << endl << "Target size: " << this -> getSize() << endl ); - Device :: template memcpy< typename Array :: ElementType, - typename Array :: IndexType, - typename Array :: DeviceType > + Device :: template memcpy< Element, + typename Array :: ElementType, + typename Array :: IndexType, + typename Array :: DeviceType > ( this -> getData(), array. getData(), array. getSize() ); diff --git a/src/implementation/core/tnlHost_impl.cpp b/src/implementation/core/tnlHost_impl.cpp index 7408bdcad56b2b2633ab474cbcb2a418d7d4f3e7..77ab56b2ad75fb97ea8555d60f3e6b1bda19b5cb 100644 --- a/src/implementation/core/tnlHost_impl.cpp +++ b/src/implementation/core/tnlHost_impl.cpp @@ -80,30 +80,30 @@ template const float& tnlHost :: getArrayElementReference< float, lo template const double& tnlHost :: getArrayElementReference< double, long int >( const double* data, const long int i ); template const long double& tnlHost :: getArrayElementReference< long double, long int >( const long double* data, const long int i ); -template bool tnlHost :: memcpy< char, int, tnlHost >( char* destination, const char* source, const int size ); -template bool tnlHost :: memcpy< int, int, tnlHost >( int* destination, const int* source, const int size ); -template bool tnlHost :: memcpy< long int, int, tnlHost >( long int* destination, const long int* source, const int size ); -template bool tnlHost :: memcpy< float, int, tnlHost >( float* destination, const float* source, const int size ); -template bool tnlHost :: memcpy< double, int, tnlHost >( double* destination, const double* source, const int size ); -template bool tnlHost :: memcpy< long double, int, tnlHost >( long double* destination, const long double* source, const int size ); -template bool tnlHost :: memcpy< char, long int, tnlHost >( char* destination, const char* source, const long int size ); -template bool tnlHost :: memcpy< int, long int, tnlHost >( int* destination, const int* source, const long int size ); -template bool tnlHost :: memcpy< long int, long int, tnlHost >( long int* destination, const long int* source, const long int size ); -template bool tnlHost :: memcpy< float, long int, tnlHost >( float* destination, const float* source, const long int size ); -template bool tnlHost :: memcpy< double, long int, tnlHost >( double* destination, const double* source, const long int size ); -template bool tnlHost :: memcpy< long double, long int, tnlHost >( long double* destination, const long double* source, const long int size ); -template bool tnlHost :: memcpy< char, int, tnlCuda >( char* destination, const char* source, const int size ); -template bool tnlHost :: memcpy< int, int, tnlCuda >( int* destination, const int* source, const int size ); -template bool tnlHost :: memcpy< long int, int, tnlCuda >( long int* destination, const long int* source, const int size ); -template bool tnlHost :: memcpy< float, int, tnlCuda >( float* destination, const float* source, const int size ); -template bool tnlHost :: memcpy< double, int, tnlCuda >( double* destination, const double* source, const int size ); -template bool tnlHost :: memcpy< long double, int, tnlCuda >( long double* destination, const long double* source, const int size ); -template bool tnlHost :: memcpy< char, long int, tnlCuda >( char* destination, const char* source, const long int size ); -template bool tnlHost :: memcpy< int, long int, tnlCuda >( int* destination, const int* source, const long int size ); -template bool tnlHost :: memcpy< long int, long int, tnlCuda >( long int* destination, const long int* source, const long int size ); -template bool tnlHost :: memcpy< float, long int, tnlCuda >( float* destination, const float* source, const long int size ); -template bool tnlHost :: memcpy< double, long int, tnlCuda >( double* destination, const double* source, const long int size ); -template bool tnlHost :: memcpy< long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size ); +template bool tnlHost :: memcpy< char, char, int, tnlHost >( char* destination, const char* source, const int size ); +template bool tnlHost :: memcpy< int, int, int, tnlHost >( int* destination, const int* source, const int size ); +template bool tnlHost :: memcpy< long int, long int, int, tnlHost >( long int* destination, const long int* source, const int size ); +template bool tnlHost :: memcpy< float, float, int, tnlHost >( float* destination, const float* source, const int size ); +template bool tnlHost :: memcpy< double, double, int, tnlHost >( double* destination, const double* source, const int size ); +template bool tnlHost :: memcpy< long double, long double, int, tnlHost >( long double* destination, const long double* source, const int size ); +template bool tnlHost :: memcpy< char, char, long int, tnlHost >( char* destination, const char* source, const long int size ); +template bool tnlHost :: memcpy< int, int, long int, tnlHost >( int* destination, const int* source, const long int size ); +template bool tnlHost :: memcpy< long int, long int, long int, tnlHost >( long int* destination, const long int* source, const long int size ); +template bool tnlHost :: memcpy< float, float, long int, tnlHost >( float* destination, const float* source, const long int size ); +template bool tnlHost :: memcpy< double, double, long int, tnlHost >( double* destination, const double* source, const long int size ); +template bool tnlHost :: memcpy< long double, long double, long int, tnlHost >( long double* destination, const long double* source, const long int size ); +template bool tnlHost :: memcpy< char, char, int, tnlCuda >( char* destination, const char* source, const int size ); +template bool tnlHost :: memcpy< int, int, int, tnlCuda >( int* destination, const int* source, const int size ); +template bool tnlHost :: memcpy< long int, long int, int, tnlCuda >( long int* destination, const long int* source, const int size ); +template bool tnlHost :: memcpy< float, float, int, tnlCuda >( float* destination, const float* source, const int size ); +template bool tnlHost :: memcpy< double, double, int, tnlCuda >( double* destination, const double* source, const int size ); +template bool tnlHost :: memcpy< long double, long double, int, tnlCuda >( long double* destination, const long double* source, const int size ); +template bool tnlHost :: memcpy< char, char, long int, tnlCuda >( char* destination, const char* source, const long int size ); +template bool tnlHost :: memcpy< int, int, long int, tnlCuda >( int* destination, const int* source, const long int size ); +template bool tnlHost :: memcpy< long int, long int, long int, tnlCuda >( long int* destination, const long int* source, const long int size ); +template bool tnlHost :: memcpy< float, float, long int, tnlCuda >( float* destination, const float* source, const long int size ); +template bool tnlHost :: memcpy< double, double, long int, tnlCuda >( double* destination, const double* source, const long int size ); +template bool tnlHost :: memcpy< long double, long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size ); template bool tnlHost :: memcmp< char, int, tnlHost >( const char* data1, const char* data2, const int size ); template bool tnlHost :: memcmp< int, int, tnlHost >( const int* data1, const int* data2, const int size ); diff --git a/src/implementation/core/tnlHost_impl.h b/src/implementation/core/tnlHost_impl.h index c3099066204c41ef596a25e3d31a5ecba43957c1..7be68418601c49380e15ba4901cdea0307360132 100644 --- a/src/implementation/core/tnlHost_impl.h +++ b/src/implementation/core/tnlHost_impl.h @@ -67,10 +67,10 @@ const Element& tnlHost :: getArrayElementReference(const Element* data, const In return data[ i ]; }; -template< typename Element, typename Index, typename Device > -bool tnlHost :: memcpy( Element* destination, - const Element* source, - const Index size ) +template< typename DestinationElement, typename SourceElement, typename Index, typename Device > +bool tnlHost :: memcpy( DestinationElement* destination, + const SourceElement* source, + const Index size ) { switch( Device :: getDevice() ) { @@ -82,6 +82,17 @@ bool tnlHost :: memcpy( Element* destination, return true; }; +template< typename Element, typename Index, typename Device > +bool tnlHost :: memcpy( Element* destination, + const Element* source, + const Index size ) +{ + return tnlHost :: memcpy< Element, Element, Index, Device > + ( destination, + source, + size ); +}; + template< typename Element, typename Index, typename Device > bool tnlHost :: memcmp( const Element* data1, const Element* data2, @@ -168,30 +179,30 @@ extern template const float& tnlHost :: getArrayElementReference< float, extern template const double& tnlHost :: getArrayElementReference< double, long int >( const double* data, const long int i ); extern template const long double& tnlHost :: getArrayElementReference< long double, long int >( const long double* data, const long int i ); -extern template bool tnlHost :: memcpy< char, int, tnlHost >( char* destination, const char* source, const int size ); -extern template bool tnlHost :: memcpy< int, int, tnlHost >( int* destination, const int* source, const int size ); -extern template bool tnlHost :: memcpy< long int, int, tnlHost >( long int* destination, const long int* source, const int size ); -extern template bool tnlHost :: memcpy< float, int, tnlHost >( float* destination, const float* source, const int size ); -extern template bool tnlHost :: memcpy< double, int, tnlHost >( double* destination, const double* source, const int size ); -extern template bool tnlHost :: memcpy< long double, int, tnlHost >( long double* destination, const long double* source, const int size ); -extern template bool tnlHost :: memcpy< char, long int, tnlHost >( char* destination, const char* source, const long int size ); -extern template bool tnlHost :: memcpy< int, long int, tnlHost >( int* destination, const int* source, const long int size ); -extern template bool tnlHost :: memcpy< long int, long int, tnlHost >( long int* destination, const long int* source, const long int size ); -extern template bool tnlHost :: memcpy< float, long int, tnlHost >( float* destination, const float* source, const long int size ); -extern template bool tnlHost :: memcpy< double, long int, tnlHost >( double* destination, const double* source, const long int size ); -extern template bool tnlHost :: memcpy< long double, long int, tnlHost >( long double* destination, const long double* source, const long int size ); -extern template bool tnlHost :: memcpy< char, int, tnlCuda >( char* destination, const char* source, const int size ); -extern template bool tnlHost :: memcpy< int, int, tnlCuda >( int* destination, const int* source, const int size ); -extern template bool tnlHost :: memcpy< long int, int, tnlCuda >( long int* destination, const long int* source, const int size ); -extern template bool tnlHost :: memcpy< float, int, tnlCuda >( float* destination, const float* source, const int size ); -extern template bool tnlHost :: memcpy< double, int, tnlCuda >( double* destination, const double* source, const int size ); -extern template bool tnlHost :: memcpy< long double, int, tnlCuda >( long double* destination, const long double* source, const int size ); -extern template bool tnlHost :: memcpy< char, long int, tnlCuda >( char* destination, const char* source, const long int size ); -extern template bool tnlHost :: memcpy< int, long int, tnlCuda >( int* destination, const int* source, const long int size ); -extern template bool tnlHost :: memcpy< long int, long int, tnlCuda >( long int* destination, const long int* source, const long int size ); -extern template bool tnlHost :: memcpy< float, long int, tnlCuda >( float* destination, const float* source, const long int size ); -extern template bool tnlHost :: memcpy< double, long int, tnlCuda >( double* destination, const double* source, const long int size ); -extern template bool tnlHost :: memcpy< long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size ); +extern template bool tnlHost :: memcpy< char, char, int, tnlHost >( char* destination, const char* source, const int size ); +extern template bool tnlHost :: memcpy< int, int, int, tnlHost >( int* destination, const int* source, const int size ); +extern template bool tnlHost :: memcpy< long int, long int, int, tnlHost >( long int* destination, const long int* source, const int size ); +extern template bool tnlHost :: memcpy< float, float, int, tnlHost >( float* destination, const float* source, const int size ); +extern template bool tnlHost :: memcpy< double, double, int, tnlHost >( double* destination, const double* source, const int size ); +extern template bool tnlHost :: memcpy< long double, long double, int, tnlHost >( long double* destination, const long double* source, const int size ); +extern template bool tnlHost :: memcpy< char, char, long int, tnlHost >( char* destination, const char* source, const long int size ); +extern template bool tnlHost :: memcpy< int, int, long int, tnlHost >( int* destination, const int* source, const long int size ); +extern template bool tnlHost :: memcpy< long int, long int, long int, tnlHost >( long int* destination, const long int* source, const long int size ); +extern template bool tnlHost :: memcpy< float, float, long int, tnlHost >( float* destination, const float* source, const long int size ); +extern template bool tnlHost :: memcpy< double, double, long int, tnlHost >( double* destination, const double* source, const long int size ); +extern template bool tnlHost :: memcpy< long double, long double, long int, tnlHost >( long double* destination, const long double* source, const long int size ); +extern template bool tnlHost :: memcpy< char, char, int, tnlCuda >( char* destination, const char* source, const int size ); +extern template bool tnlHost :: memcpy< int, int, int, tnlCuda >( int* destination, const int* source, const int size ); +extern template bool tnlHost :: memcpy< long int, long int, int, tnlCuda >( long int* destination, const long int* source, const int size ); +extern template bool tnlHost :: memcpy< float, float, int, tnlCuda >( float* destination, const float* source, const int size ); +extern template bool tnlHost :: memcpy< double, double, int, tnlCuda >( double* destination, const double* source, const int size ); +extern template bool tnlHost :: memcpy< long double, long double, int, tnlCuda >( long double* destination, const long double* source, const int size ); +extern template bool tnlHost :: memcpy< char, char, long int, tnlCuda >( char* destination, const char* source, const long int size ); +extern template bool tnlHost :: memcpy< int, int, long int, tnlCuda >( int* destination, const int* source, const long int size ); +extern template bool tnlHost :: memcpy< long int, long int, long int, tnlCuda >( long int* destination, const long int* source, const long int size ); +extern template bool tnlHost :: memcpy< float, float, long int, tnlCuda >( float* destination, const float* source, const long int size ); +extern template bool tnlHost :: memcpy< double, double, long int, tnlCuda >( double* destination, const double* source, const long int size ); +extern template bool tnlHost :: memcpy< long double, long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size ); extern template bool tnlHost :: memcmp< char, int, tnlHost >( const char* data1, const char* data2, const int size ); extern template bool tnlHost :: memcmp< int, int, tnlHost >( const int* data1, const int* data2, const int size );