From 4b41a1ef803bf3a9c2f7b076ac4c0e1b8c8723a3 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Tue, 28 May 2013 14:25:19 +0200
Subject: [PATCH] Fixing copying arrays with different types.

---
 src/core/tnlCuda.h                          | 21 ++++++-
 src/core/tnlHost.h                          |  9 ++-
 src/implementation/core/memory-operations.h | 41 ++++++++++++-
 src/implementation/core/tnlArray_impl.h     |  7 ++-
 src/implementation/core/tnlHost_impl.cpp    | 48 +++++++--------
 src/implementation/core/tnlHost_impl.h      | 67 ++++++++++++---------
 6 files changed, 131 insertions(+), 62 deletions(-)

diff --git a/src/core/tnlCuda.h b/src/core/tnlCuda.h
index 6d3dd31b0f..f0de588872 100644
--- a/src/core/tnlCuda.h
+++ b/src/core/tnlCuda.h
@@ -79,9 +79,12 @@ class tnlCuda
       abort();
    }
 
-   template< typename Element, typename Index, typename Device >
-   static bool memcpy( Element* destination,
-                       const Element* source,
+   template< typename DestinationElement,
+             typename SourceElement,
+             typename Index,
+             typename Device >
+   static bool memcpy( DestinationElement* destination,
+                       const SourceElement* source,
                        const Index size )
    {
       switch( Device :: getDevice() )
@@ -94,6 +97,18 @@ class tnlCuda
       return true;
    }
 
+
+   template< typename Element, typename Index, typename Device >
+   static bool memcpy( Element* destination,
+                       const Element* source,
+                       const Index size )
+   {
+      return tnlCuda :: memcpy< Element, Element, Index, Device >
+                              ( destination,
+                                source,
+                                size );
+   }
+
    template< typename Element, typename Index, typename Device >
    static bool memcmp( const Element* data1,
                        const Element* data2,
diff --git a/src/core/tnlHost.h b/src/core/tnlHost.h
index d2edb0ca7f..93f7ef84c7 100644
--- a/src/core/tnlHost.h
+++ b/src/core/tnlHost.h
@@ -49,6 +49,11 @@ class tnlHost
    template< typename Element, typename Index >
    static const Element& getArrayElementReference(const Element* data, const Index i );
 
+   template< typename DestinationElement, typename SourceElement, typename Index, typename Device >
+   static bool memcpy( DestinationElement* destination,
+                       const SourceElement* source,
+                       const Index size );
+
    template< typename Element, typename Index, typename Device >
    static bool memcpy( Element* destination,
                        const Element* source,
@@ -56,8 +61,8 @@ class tnlHost
 
    template< typename Element, typename Index, typename Device >
    static bool memcmp( const Element* data1,
-                         const Element* data2,
-                         const Index size );
+                       const Element* data2,
+                       const Index size );
 
    template< typename Element, typename Index >
    static bool memset( Element* destination,
diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h
index 20f72e1435..cfc3fe2a41 100644
--- a/src/implementation/core/memory-operations.h
+++ b/src/implementation/core/memory-operations.h
@@ -23,6 +23,7 @@
 #include <core/cuda/reduction-operations.h>
 #include <core/mfuncs.h>
 #include <tnlConfig.h>
+#include <string.h>
 
 const int tnlGPUvsCPUTransferBufferSize( 1 << 20 );
 
@@ -115,16 +116,26 @@ bool setMemoryCuda( Element* data,
 
 }
 
+template< typename DestinationElement, typename SourceElement, typename Index >
+bool copyMemoryHostToHost( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size )
+{
+   for( Index i = 0; i < size; i ++ )
+      destination[ i ] = ( DestinationElement) source[ i ];
+   return true;
+}
+
 template< typename Element, typename Index >
 bool copyMemoryHostToHost( Element* destination,
                            const Element* source,
                            const Index size )
 {
-   for( Index i = 0; i < size; i ++ )
-      destination[ i ] = source[ i ];
+   memcpy( destination, source, size * sizeof( Element ) );
    return true;
 }
 
+
 template< typename Element, typename Index >
 bool copyMemoryHostToCuda( Element* destination,
                            const Element* source,
@@ -170,6 +181,32 @@ bool copyMemoryCudaToHost( Element* destination,
 #endif
 }
 
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+bool copyMemoryCudaToHost( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size )
+{
+#ifdef HAVE_CUDA
+   abort(); // TODO: fix this
+   cudaMemcpy( destination,
+               source,
+               size * sizeof( Element ),
+               cudaMemcpyDeviceToHost );
+   if( ! checkCudaDevice )
+   {
+      cerr << "Transfer of data from CUDA device to host failed." << endl;
+      return false;
+   }
+   return true;
+#else
+   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+   return false;
+#endif
+}
+
+
 template< typename Element, typename Index >
 bool copyMemoryCudaToCuda( Element* destination,
                            const Element* source,
diff --git a/src/implementation/core/tnlArray_impl.h b/src/implementation/core/tnlArray_impl.h
index 466ec12e33..c96ad724d1 100644
--- a/src/implementation/core/tnlArray_impl.h
+++ b/src/implementation/core/tnlArray_impl.h
@@ -213,9 +213,10 @@ tnlArray< Element, Device, Index >&
                 << "Source size: " << array. getSize() << endl
                 << "Target name: " << this -> getName() << endl
                 << "Target size: " << this -> getSize() << endl );
-   Device :: template memcpy< typename Array :: ElementType,
-                               typename Array :: IndexType,
-                               typename Array :: DeviceType >
+   Device :: template memcpy< Element,
+                              typename Array :: ElementType,
+                              typename Array :: IndexType,
+                              typename Array :: DeviceType >
                              ( this -> getData(),
                                array. getData(),
                                array. getSize() );
diff --git a/src/implementation/core/tnlHost_impl.cpp b/src/implementation/core/tnlHost_impl.cpp
index 7408bdcad5..77ab56b2ad 100644
--- a/src/implementation/core/tnlHost_impl.cpp
+++ b/src/implementation/core/tnlHost_impl.cpp
@@ -80,30 +80,30 @@ template const float&       tnlHost :: getArrayElementReference< float,       lo
 template const double&      tnlHost :: getArrayElementReference< double,      long int >( const double* data, const long int i );
 template const long double& tnlHost :: getArrayElementReference< long double, long int >( const long double* data, const long int i );
 
-template bool tnlHost :: memcpy< char,        int, tnlHost >( char* destination, const char* source, const int size );
-template bool tnlHost :: memcpy< int,         int, tnlHost >( int* destination, const int* source, const int size );
-template bool tnlHost :: memcpy< long int,    int, tnlHost >( long int* destination, const long int* source, const int size );
-template bool tnlHost :: memcpy< float,       int, tnlHost >( float* destination, const float* source, const int size );
-template bool tnlHost :: memcpy< double,      int, tnlHost >( double* destination, const double* source, const int size );
-template bool tnlHost :: memcpy< long double, int, tnlHost >( long double* destination, const long double* source, const int size );
-template bool tnlHost :: memcpy< char,        long int, tnlHost >( char* destination, const char* source, const long int size );
-template bool tnlHost :: memcpy< int,         long int, tnlHost >( int* destination, const int* source, const long int size );
-template bool tnlHost :: memcpy< long int,    long int, tnlHost >( long int* destination, const long int* source, const long int size );
-template bool tnlHost :: memcpy< float,       long int, tnlHost >( float* destination, const float* source, const long int size );
-template bool tnlHost :: memcpy< double,      long int, tnlHost >( double* destination, const double* source, const long int size );
-template bool tnlHost :: memcpy< long double, long int, tnlHost >( long double* destination, const long double* source, const long int size );
-template bool tnlHost :: memcpy< char,        int, tnlCuda >( char* destination, const char* source, const int size );
-template bool tnlHost :: memcpy< int,         int, tnlCuda >( int* destination, const int* source, const int size );
-template bool tnlHost :: memcpy< long int,    int, tnlCuda >( long int* destination, const long int* source, const int size );
-template bool tnlHost :: memcpy< float,       int, tnlCuda >( float* destination, const float* source, const int size );
-template bool tnlHost :: memcpy< double,      int, tnlCuda >( double* destination, const double* source, const int size );
-template bool tnlHost :: memcpy< long double, int, tnlCuda >( long double* destination, const long double* source, const int size );
-template bool tnlHost :: memcpy< char,        long int, tnlCuda >( char* destination, const char* source, const long int size );
-template bool tnlHost :: memcpy< int,         long int, tnlCuda >( int* destination, const int* source, const long int size );
-template bool tnlHost :: memcpy< long int,    long int, tnlCuda >( long int* destination, const long int* source, const long int size );
-template bool tnlHost :: memcpy< float,       long int, tnlCuda >( float* destination, const float* source, const long int size );
-template bool tnlHost :: memcpy< double,      long int, tnlCuda >( double* destination, const double* source, const long int size );
-template bool tnlHost :: memcpy< long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size );
+template bool tnlHost :: memcpy< char,        char,        int, tnlHost >( char* destination, const char* source, const int size );
+template bool tnlHost :: memcpy< int,         int,         int, tnlHost >( int* destination, const int* source, const int size );
+template bool tnlHost :: memcpy< long int,    long int,    int, tnlHost >( long int* destination, const long int* source, const int size );
+template bool tnlHost :: memcpy< float,       float,       int, tnlHost >( float* destination, const float* source, const int size );
+template bool tnlHost :: memcpy< double,      double,      int, tnlHost >( double* destination, const double* source, const int size );
+template bool tnlHost :: memcpy< long double, long double, int, tnlHost >( long double* destination, const long double* source, const int size );
+template bool tnlHost :: memcpy< char,        char,        long int, tnlHost >( char* destination, const char* source, const long int size );
+template bool tnlHost :: memcpy< int,         int,         long int, tnlHost >( int* destination, const int* source, const long int size );
+template bool tnlHost :: memcpy< long int,    long int,    long int, tnlHost >( long int* destination, const long int* source, const long int size );
+template bool tnlHost :: memcpy< float,       float,       long int, tnlHost >( float* destination, const float* source, const long int size );
+template bool tnlHost :: memcpy< double,      double,      long int, tnlHost >( double* destination, const double* source, const long int size );
+template bool tnlHost :: memcpy< long double, long double, long int, tnlHost >( long double* destination, const long double* source, const long int size );
+template bool tnlHost :: memcpy< char,        char,        int, tnlCuda >( char* destination, const char* source, const int size );
+template bool tnlHost :: memcpy< int,         int,         int, tnlCuda >( int* destination, const int* source, const int size );
+template bool tnlHost :: memcpy< long int,    long int,    int, tnlCuda >( long int* destination, const long int* source, const int size );
+template bool tnlHost :: memcpy< float,       float,       int, tnlCuda >( float* destination, const float* source, const int size );
+template bool tnlHost :: memcpy< double,      double,      int, tnlCuda >( double* destination, const double* source, const int size );
+template bool tnlHost :: memcpy< long double, long double, int, tnlCuda >( long double* destination, const long double* source, const int size );
+template bool tnlHost :: memcpy< char,        char,        long int, tnlCuda >( char* destination, const char* source, const long int size );
+template bool tnlHost :: memcpy< int,         int,         long int, tnlCuda >( int* destination, const int* source, const long int size );
+template bool tnlHost :: memcpy< long int,    long int,    long int, tnlCuda >( long int* destination, const long int* source, const long int size );
+template bool tnlHost :: memcpy< float,       float,       long int, tnlCuda >( float* destination, const float* source, const long int size );
+template bool tnlHost :: memcpy< double,      double,      long int, tnlCuda >( double* destination, const double* source, const long int size );
+template bool tnlHost :: memcpy< long double, long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size );
 
 template bool tnlHost :: memcmp< char,        int, tnlHost >( const char* data1, const char* data2, const int size );
 template bool tnlHost :: memcmp< int,         int, tnlHost >( const int* data1, const int* data2, const int size );
diff --git a/src/implementation/core/tnlHost_impl.h b/src/implementation/core/tnlHost_impl.h
index c309906620..7be6841860 100644
--- a/src/implementation/core/tnlHost_impl.h
+++ b/src/implementation/core/tnlHost_impl.h
@@ -67,10 +67,10 @@ const Element& tnlHost :: getArrayElementReference(const Element* data, const In
    return data[ i ];
 };
 
-template< typename Element, typename Index, typename Device >
-bool tnlHost :: memcpy( Element* destination,
-                               const Element* source,
-                               const Index size )
+template< typename DestinationElement, typename SourceElement, typename Index, typename Device >
+bool tnlHost :: memcpy( DestinationElement* destination,
+                        const SourceElement* source,
+                        const Index size )
 {
    switch( Device :: getDevice() )
    {
@@ -82,6 +82,17 @@ bool tnlHost :: memcpy( Element* destination,
    return true;
 };
 
+template< typename Element, typename Index, typename Device >
+bool tnlHost :: memcpy( Element* destination,
+                        const Element* source,
+                        const Index size )
+{
+   return tnlHost :: memcpy< Element, Element, Index, Device >
+                           ( destination,
+                             source,
+                             size );
+};
+
 template< typename Element, typename Index, typename Device >
 bool tnlHost :: memcmp( const Element* data1,
                                const Element* data2,
@@ -168,30 +179,30 @@ extern template const float&       tnlHost :: getArrayElementReference< float,
 extern template const double&      tnlHost :: getArrayElementReference< double,      long int >( const double* data, const long int i );
 extern template const long double& tnlHost :: getArrayElementReference< long double, long int >( const long double* data, const long int i );
 
-extern template bool tnlHost :: memcpy< char,        int, tnlHost >( char* destination, const char* source, const int size );
-extern template bool tnlHost :: memcpy< int,         int, tnlHost >( int* destination, const int* source, const int size );
-extern template bool tnlHost :: memcpy< long int,    int, tnlHost >( long int* destination, const long int* source, const int size );
-extern template bool tnlHost :: memcpy< float,       int, tnlHost >( float* destination, const float* source, const int size );
-extern template bool tnlHost :: memcpy< double,      int, tnlHost >( double* destination, const double* source, const int size );
-extern template bool tnlHost :: memcpy< long double, int, tnlHost >( long double* destination, const long double* source, const int size );
-extern template bool tnlHost :: memcpy< char,        long int, tnlHost >( char* destination, const char* source, const long int size );
-extern template bool tnlHost :: memcpy< int,         long int, tnlHost >( int* destination, const int* source, const long int size );
-extern template bool tnlHost :: memcpy< long int,    long int, tnlHost >( long int* destination, const long int* source, const long int size );
-extern template bool tnlHost :: memcpy< float,       long int, tnlHost >( float* destination, const float* source, const long int size );
-extern template bool tnlHost :: memcpy< double,      long int, tnlHost >( double* destination, const double* source, const long int size );
-extern template bool tnlHost :: memcpy< long double, long int, tnlHost >( long double* destination, const long double* source, const long int size );
-extern template bool tnlHost :: memcpy< char,        int, tnlCuda >( char* destination, const char* source, const int size );
-extern template bool tnlHost :: memcpy< int,         int, tnlCuda >( int* destination, const int* source, const int size );
-extern template bool tnlHost :: memcpy< long int,    int, tnlCuda >( long int* destination, const long int* source, const int size );
-extern template bool tnlHost :: memcpy< float,       int, tnlCuda >( float* destination, const float* source, const int size );
-extern template bool tnlHost :: memcpy< double,      int, tnlCuda >( double* destination, const double* source, const int size );
-extern template bool tnlHost :: memcpy< long double, int, tnlCuda >( long double* destination, const long double* source, const int size );
-extern template bool tnlHost :: memcpy< char,        long int, tnlCuda >( char* destination, const char* source, const long int size );
-extern template bool tnlHost :: memcpy< int,         long int, tnlCuda >( int* destination, const int* source, const long int size );
-extern template bool tnlHost :: memcpy< long int,    long int, tnlCuda >( long int* destination, const long int* source, const long int size );
-extern template bool tnlHost :: memcpy< float,       long int, tnlCuda >( float* destination, const float* source, const long int size );
-extern template bool tnlHost :: memcpy< double,      long int, tnlCuda >( double* destination, const double* source, const long int size );
-extern template bool tnlHost :: memcpy< long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size );
+extern template bool tnlHost :: memcpy< char,        char,        int, tnlHost >( char* destination, const char* source, const int size );
+extern template bool tnlHost :: memcpy< int,         int,         int, tnlHost >( int* destination, const int* source, const int size );
+extern template bool tnlHost :: memcpy< long int,    long int,    int, tnlHost >( long int* destination, const long int* source, const int size );
+extern template bool tnlHost :: memcpy< float,       float,       int, tnlHost >( float* destination, const float* source, const int size );
+extern template bool tnlHost :: memcpy< double,      double,      int, tnlHost >( double* destination, const double* source, const int size );
+extern template bool tnlHost :: memcpy< long double, long double, int, tnlHost >( long double* destination, const long double* source, const int size );
+extern template bool tnlHost :: memcpy< char,        char,        long int, tnlHost >( char* destination, const char* source, const long int size );
+extern template bool tnlHost :: memcpy< int,         int,         long int, tnlHost >( int* destination, const int* source, const long int size );
+extern template bool tnlHost :: memcpy< long int,    long int,    long int, tnlHost >( long int* destination, const long int* source, const long int size );
+extern template bool tnlHost :: memcpy< float,       float,       long int, tnlHost >( float* destination, const float* source, const long int size );
+extern template bool tnlHost :: memcpy< double,      double,      long int, tnlHost >( double* destination, const double* source, const long int size );
+extern template bool tnlHost :: memcpy< long double, long double, long int, tnlHost >( long double* destination, const long double* source, const long int size );
+extern template bool tnlHost :: memcpy< char,        char,        int, tnlCuda >( char* destination, const char* source, const int size );
+extern template bool tnlHost :: memcpy< int,         int,         int, tnlCuda >( int* destination, const int* source, const int size );
+extern template bool tnlHost :: memcpy< long int,    long int,    int, tnlCuda >( long int* destination, const long int* source, const int size );
+extern template bool tnlHost :: memcpy< float,       float,       int, tnlCuda >( float* destination, const float* source, const int size );
+extern template bool tnlHost :: memcpy< double,      double,      int, tnlCuda >( double* destination, const double* source, const int size );
+extern template bool tnlHost :: memcpy< long double, long double, int, tnlCuda >( long double* destination, const long double* source, const int size );
+extern template bool tnlHost :: memcpy< char,        char,        long int, tnlCuda >( char* destination, const char* source, const long int size );
+extern template bool tnlHost :: memcpy< int,         int,         long int, tnlCuda >( int* destination, const int* source, const long int size );
+extern template bool tnlHost :: memcpy< long int,    long int,    long int, tnlCuda >( long int* destination, const long int* source, const long int size );
+extern template bool tnlHost :: memcpy< float,       float,       long int, tnlCuda >( float* destination, const float* source, const long int size );
+extern template bool tnlHost :: memcpy< double,      double,      long int, tnlCuda >( double* destination, const double* source, const long int size );
+extern template bool tnlHost :: memcpy< long double, long double, long int, tnlCuda >( long double* destination, const long double* source, const long int size );
 
 extern template bool tnlHost :: memcmp< char,        int, tnlHost >( const char* data1, const char* data2, const int size );
 extern template bool tnlHost :: memcmp< int,         int, tnlHost >( const int* data1, const int* data2, const int size );
-- 
GitLab