diff --git a/src/implementation/core/cuda/cuda-reduction_impl.h b/src/implementation/core/cuda/cuda-reduction_impl.h
index 741b613bf139dbaafea0de4ab07537fa4bccadc3..6355790aab7ca262e7f328f2e16c87f6fdefeb68 100644
--- a/src/implementation/core/cuda/cuda-reduction_impl.h
+++ b/src/implementation/core/cuda/cuda-reduction_impl.h
@@ -343,10 +343,8 @@ typename Operation :: IndexType reduceOnCudaDevice( const Operation& operation,
             break;
          case   1:
             tnlAssert( false, cerr << "blockSize should not be 1." << endl );
-            break;
          default:
             tnlAssert( false, cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." );
-            break;
       }
    return gridSize. x;
 }
diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h
index 3dfd2ee83de9d03561de7bd2b5f079c95d8cbfed..aa8b6d15c3d060e036c0d84d89724a6948682589 100644
--- a/src/implementation/core/memory-operations.h
+++ b/src/implementation/core/memory-operations.h
@@ -108,13 +108,11 @@ bool setMemoryCuda( Element* data,
    Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
    gridSize. x = Min( blocksNumber, maxGridSize );
    setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );
-
    return checkCudaDevice;
 #else
       cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
       return false;
 #endif
-
 }
 
 template< typename DestinationElement, typename SourceElement, typename Index >
@@ -166,7 +164,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
                            const SourceElement* source,
                            const Index size )
 {
-#ifdef HAVE_CUDA
    DestinationElement* buffer = new DestinationElement[ tnlGPUvsCPUTransferBufferSize ];
    if( ! buffer )
    {
@@ -179,8 +176,8 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
       Index j( 0 );
       while( j < tnlGPUvsCPUTransferBufferSize && i + j < size )
          buffer[ j ] = source[ i + j++ ];
-      if( ! copyMemoryHostTuCuda( buffer,
-                                  &destination[ i ],
+      if( ! copyMemoryHostToCuda( &destination[ i ],
+                                  buffer,
                                   j ) )
       {
          delete[] buffer;
@@ -190,10 +187,6 @@ bool copyMemoryHostToCuda( DestinationElement* destination,
    }
    delete[] buffer;
    return true;
-#else
-   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
-   return false;
-#endif
 }
 
 template< typename Element, typename Index >
@@ -234,8 +227,8 @@ bool copyMemoryCudaToHost( DestinationElement* destination,
    Index i( 0 );
    while( i < size )
    {
-      if( ! copyMemoryCudaToHost( &source[ i ],
-                                  buffer,
+      if( ! copyMemoryCudaToHost( buffer,
+                                  &source[ i ],
                                   Min( size - i, tnlGPUvsCPUTransferBufferSize ) ) )
       {
          delete[] buffer;
@@ -268,10 +261,63 @@ bool copyMemoryCudaToCuda( Element* destination,
 #endif
 }
 
-template< typename Element, typename Index >
+#ifdef HAVE_CUDA
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+__global__ void copyMemoryCudaToCudaKernel( DestinationElement* destination,
+                                            const SourceElement* source,
+                                            const Index size )
+{
+   Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
+   const Index maxGridSize = blockDim. x * gridDim. x;
+   while( elementIdx < size )
+   {
+      destination[ elementIdx ] = source[ elementIdx ];
+      elementIdx += maxGridSize;
+   }
+}
+#endif
+
+
+template< typename DestinationElement,
+          typename SourceElement,
+          typename Index >
+bool copyMemoryCudaToCuda( DestinationElement* destination,
+                           const SourceElement* source,
+                           const Index size,
+                           const Index maxGridSize )
+{
+#ifdef HAVE_CUDA
+   dim3 blockSize( 0 ), gridSize( 0 );
+   blockSize. x = 256;
+   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
+   gridSize. x = Min( blocksNumber, maxGridSize );
+   copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
+   return checkCudaDevice;
+#else
+      cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+      return false;
+#endif
+}
+
+template< typename Element,
+          typename Index >
 bool compareMemoryHost( const Element* data1,
                         const Element* data2,
                         const Index size )
+{
+   if( memcmp( data1, data2, size * sizeof( Element ) ) != 0 )
+      return false;
+   return true;
+}
+
+template< typename Element1,
+          typename Element2,
+          typename Index >
+bool compareMemoryHost( const Element1* data1,
+                        const Element2* data2,
+                        const Index size )
 {
    for( Index i = 0; i < size; i ++ )
       if( data1[ i ] != data2[ i ] )
@@ -279,15 +325,15 @@ bool compareMemoryHost( const Element* data1,
    return true;
 }
 
-template< typename Element, typename Index >
-bool compareMemoryHostCuda( const Element* hostData,
-                               const Element* deviceData,
-                               const Index size )
+template< typename Element1,
+          typename Element2,
+          typename Index >
+bool compareMemoryHostCuda( const Element1* hostData,
+                            const Element2* deviceData,
+                            const Index size )
 {
 #ifdef HAVE_CUDA
-   Index host_buffer_size = :: Min( ( Index ) ( tnlGPUvsCPUTransferBufferSize / sizeof( Element ) ),
-                                                size );
-   Element* host_buffer = new Element[ host_buffer_size ];
+   Element2* host_buffer = new Element2[ tnlGPUvsCPUTransferBufferSize ];
    if( ! host_buffer )
    {
       cerr << "I am sorry but I cannot allocate supporting buffer on the host for comparing data between CUDA GPU and CPU." << endl;
@@ -296,10 +342,10 @@ bool compareMemoryHostCuda( const Element* hostData,
    Index compared( 0 );
    while( compared < size )
    {
-      Index transfer = Min( size - compared, host_buffer_size );
+      Index transfer = Min( size - compared, tnlGPUvsCPUTransferBufferSize );
       if( cudaMemcpy( ( void* ) host_buffer,
                       ( void* ) & ( deviceData[ compared ] ),
-                      transfer * sizeof( Element ),
+                      transfer * sizeof( Element2 ),
                       cudaMemcpyDeviceToHost ) != cudaSuccess )
       {
          cerr << "Transfer of data from the device failed." << endl;
@@ -328,7 +374,8 @@ bool compareMemoryHostCuda( const Element* hostData,
 #endif
 }
 
-template< typename Element, typename Index >
+template< typename Element,
+          typename Index >
 bool compareMemoryCuda( const Element* deviceData1,
                         const Element* deviceData2,
                         const Index size )
diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt
index 0f5e338e650c094b88ae986ffed0482935615e4e..80fa599c77340d33cd597f470714c9a71d10e2d5 100755
--- a/tests/unit-tests/CMakeLists.txt
+++ b/tests/unit-tests/CMakeLists.txt
@@ -39,7 +39,7 @@ if( BUILD_CUDA )
    ADD_TEST( core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnl-memory-operations-test${mpiExt}${debugExt} )
    SET_TESTS_PROPERTIES ( core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-device-check-test${mpiExt}${debugExt} )
    ADD_TEST( core/cuda/tnl-reduction-test${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnl-reduction-test${mpiExt}${debugExt} )
-   SET_TESTS_PROPERTIES ( core/cuda/tnl-reduction-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} )
+   SET_TESTS_PROPERTIES ( core/cuda/tnl-reduction-test${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-memory-operations-test${mpiExt}${debugExt} )   
    ADD_TEST( core/cuda/tnlCudaVectorOperationsTest${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlCudaVectorOperationsTest${mpiExt}${debugExt} )
    SET_TESTS_PROPERTIES ( core/cuda/tnlCudaVectorOperationsTest${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnl-reduction-test${mpiExt}${debugExt} )   
       
diff --git a/tests/unit-tests/core/CMakeLists.txt b/tests/unit-tests/core/CMakeLists.txt
index ab23f61b0aaa1c8635748e5d930a8e92758ffded..dbad7047b16293927d2ec5242ce18ee4e372ee57 100755
--- a/tests/unit-tests/core/CMakeLists.txt
+++ b/tests/unit-tests/core/CMakeLists.txt
@@ -19,17 +19,5 @@ TARGET_LINK_LIBRARIES( tnlArrayTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                            tnl${mpiExt}${debugExt}-0.1 )
              
 if( BUILD_CUDA )
-#    CUDA_ADD_EXECUTABLE( tnl-device-check-test${mpiExt}${debugExt} ${headers} device-check-test.cu )
-#    TARGET_LINK_LIBRARIES( tnl-device-check-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
-#                                                              tnl${mpiExt}${debugExt}-0.1 )
-#
-#    CUDA_ADD_EXECUTABLE( tnl-memory-operations-test${mpiExt}${debugExt} ${headers} memory-operations-test.cu )
-#    TARGET_LINK_LIBRARIES( tnl-memory-operations-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
-#                                                              tnl${mpiExt}${debugExt}-0.1 )
-#    
-#    CUDA_ADD_EXECUTABLE( tnl-reduction-test${mpiExt}${debugExt} ${headers} reduction-test.cu )
-#    TARGET_LINK_LIBRARIES( tnl-reduction-test${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
-#                                                              tnl${mpiExt}${debugExt}-0.1 )                                                                
-#                                                                                                                                                                                                                                                            
 endif()
                                                                        
\ No newline at end of file
diff --git a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h
index 6be0197eb1996c4f3475a65acac13f9539c4ac15..57522c234eecf1488f9b3f9918e328230adee4ad 100644
--- a/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h
+++ b/tests/unit-tests/core/cuda/tnlCudaMemoryOperationsTester.h
@@ -46,10 +46,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                 "allocationTest",
                                 &tnlCudaMemoryOperationsTester :: allocationTest )
                                );
-      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
-                                "copyTest",
-                                &tnlCudaMemoryOperationsTester :: allocationTest )
-                               );
       suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
                                 "smallMemorySetTest",
                                 &tnlCudaMemoryOperationsTester :: smallMemorySetTest )
@@ -58,6 +54,30 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
                                 "bigMemorySetTest",
                                 &tnlCudaMemoryOperationsTester :: bigMemorySetTest )
                                );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "copyMemoryTest",
+                                &tnlCudaMemoryOperationsTester :: copyMemoryTest )
+                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "copyMemoryWithConversionHostToCudaTest",
+                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionHostToCudaTest )
+                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "copyMemoryWithConversionCudaToHostTest",
+                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToHostTest )
+                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "copyMemoryWithConversionCudaToCudaTest",
+                                &tnlCudaMemoryOperationsTester :: copyMemoryWithConversionCudaToCudaTest )
+                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "compareMemoryHostCudaTest",
+                                &tnlCudaMemoryOperationsTester :: compareMemoryHostCudaTest )
+                               );
+      suiteOfTests -> addTest( new CppUnit :: TestCaller< tnlCudaMemoryOperationsTester >(
+                                "compareMemoryWithConevrsionHostCudaTest",
+                                &tnlCudaMemoryOperationsTester :: compareMemoryWithConversionHostCudaTest )
+                               );
 
       return suiteOfTests;
    };
@@ -72,22 +92,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
       CPPUNIT_ASSERT( checkCudaDevice );
    };
 
-   void copyTest()
-   {
-      const int size( 1 << 22 );
-      int *hostData1, *hostData2, *deviceData;
-      allocateMemoryHost( hostData1, size );
-      allocateMemoryHost( hostData2, size );
-      allocateMemoryCuda( deviceData, size );
-      setMemoryHost( hostData1, 13, size );
-      copyMemoryHostToCuda( deviceData, hostData1, size );
-      copyMemoryCudaToHost( hostData2, deviceData, size );
-      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
-      freeMemoryHost( hostData1 );
-      freeMemoryHost( hostData2 );
-      freeMemoryCuda( deviceData );
-   };
-
    void smallMemorySetTest()
    {
       const int size( 1024 );
@@ -108,7 +112,6 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
    void bigMemorySetTest()
    {
       const int size( 1.1 * maxCudaGridSize * maxCudaBlockSize );
-      cout << "Size = " << size << endl;
       int *hostData, *deviceData;
       allocateMemoryHost( hostData, size );
       allocateMemoryCuda( deviceData, size );
@@ -120,13 +123,113 @@ class tnlCudaMemoryOperationsTester : public CppUnit :: TestCase
       for( int i = 0; i < size; i += 100 )
       {
          if( hostData[ i ] != 13 )
-            cout << " i = " << i << " " << hostData[ i ] << endl;
          CPPUNIT_ASSERT( hostData[ i ] == 13 );
       }
       freeMemoryHost( hostData );
       freeMemoryCuda( deviceData );
    };
 
+   void copyMemoryTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData1, *hostData2, *deviceData;
+      allocateMemoryHost( hostData1, size );
+      allocateMemoryHost( hostData2, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData1, 13, size );
+      copyMemoryHostToCuda( deviceData, hostData1, size );
+      copyMemoryCudaToHost( hostData2, deviceData, size );
+      CPPUNIT_ASSERT( compareMemoryHost( hostData1, hostData2, size) );
+      freeMemoryHost( hostData1 );
+      freeMemoryHost( hostData2 );
+      freeMemoryCuda( deviceData );
+   };
+
+   void copyMemoryWithConversionHostToCudaTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData1;
+      float *hostData2, *deviceData;
+      allocateMemoryHost( hostData1, size );
+      allocateMemoryHost( hostData2, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData1, 13, size );
+      copyMemoryHostToCuda( deviceData, hostData1, size );
+      copyMemoryCudaToHost( hostData2, deviceData, size );
+      for( int i = 0; i < size; i ++ )
+         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
+      freeMemoryHost( hostData1 );
+      freeMemoryHost( hostData2 );
+      freeMemoryCuda( deviceData );
+   };
+
+   void copyMemoryWithConversionCudaToHostTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData1, *deviceData;
+      float *hostData2;
+      allocateMemoryHost( hostData1, size );
+      allocateMemoryHost( hostData2, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData1, 13, size );
+      copyMemoryHostToCuda( deviceData, hostData1, size );
+      copyMemoryCudaToHost( hostData2, deviceData, size );
+      for( int i = 0; i < size; i ++ )
+         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
+      freeMemoryHost( hostData1 );
+      freeMemoryHost( hostData2 );
+      freeMemoryCuda( deviceData );
+   };
+
+   void copyMemoryWithConversionCudaToCudaTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData1, *deviceData1;
+      float *hostData2, *deviceData2;
+      allocateMemoryHost( hostData1, size );
+      allocateMemoryHost( hostData2, size );
+      allocateMemoryCuda( deviceData1, size );
+      allocateMemoryCuda( deviceData2, size );
+      setMemoryHost( hostData1, 13, size );
+      copyMemoryHostToCuda( deviceData1, hostData1, size );
+      copyMemoryCudaToCuda( deviceData2, deviceData1, size, maxCudaGridSize );
+      copyMemoryCudaToHost( hostData2, deviceData2, size );
+      for( int i = 0; i < size; i ++ )
+         CPPUNIT_ASSERT( hostData1[ i ] == hostData2[ i ] );
+      freeMemoryHost( hostData1 );
+      freeMemoryHost( hostData2 );
+      freeMemoryCuda( deviceData1 );
+      freeMemoryCuda( deviceData2 );
+   };
+
+   void compareMemoryHostCudaTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData, *deviceData;
+      allocateMemoryHost( hostData, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData, 7, size );
+      setMemoryCuda( deviceData, 8, size, maxCudaGridSize );
+      CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) );
+      setMemoryCuda( deviceData, 7, size, maxCudaGridSize );
+      CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) );
+   };
+
+   void compareMemoryWithConversionHostCudaTest()
+   {
+      const int size( 1 << 22 );
+      int *hostData;
+      float *deviceData;
+      allocateMemoryHost( hostData, size );
+      allocateMemoryCuda( deviceData, size );
+      setMemoryHost( hostData, 7, size );
+      setMemoryCuda( deviceData, ( float ) 8.0, size, maxCudaGridSize );
+      CPPUNIT_ASSERT( ! compareMemoryHostCuda( hostData, deviceData, size ) );
+      setMemoryCuda( deviceData, ( float ) 7.0, size, maxCudaGridSize );
+      CPPUNIT_ASSERT( compareMemoryHostCuda( hostData, deviceData, size ) );
+   };
+
+
 };
 
 #else
diff --git a/tests/unit-tests/core/tnlArrayTest.cpp b/tests/unit-tests/core/tnlArrayTest.cpp
index 4184e8c35650aa9764442ae0afca73eec4a3a345..2902b47838bfc72f471e690291637aebfdbebd8f 100644
--- a/tests/unit-tests/core/tnlArrayTest.cpp
+++ b/tests/unit-tests/core/tnlArrayTest.cpp
@@ -15,14 +15,17 @@
  *                                                                         *
  ***************************************************************************/
 
+#include <tnlConfig.h>
+#include <core/tnlHost.h>
 #include <cstdlib>
+
 #include "tnlArrayTester.h"
 #include "../tnlUnitTestStarter.h"
 
 int main( int argc, char* argv[] )
 {
 #ifdef HAVE_CPPUNIT
-/*   if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() ||
+   if( ! tnlUnitTestStarter :: run< tnlArrayTester< char, tnlHost, int > >() ||
        ! tnlUnitTestStarter :: run< tnlArrayTester< int, tnlHost, int > >() ||
        ! tnlUnitTestStarter :: run< tnlArrayTester< long int, tnlHost, int > >() ||
        ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, int > >() ||
@@ -34,7 +37,7 @@ int main( int argc, char* argv[] )
        ! tnlUnitTestStarter :: run< tnlArrayTester< float, tnlHost, long int > >() ||
        ! tnlUnitTestStarter :: run< tnlArrayTester< double, tnlHost, long int > >() ||
        ! tnlUnitTestStarter :: run< tnlArrayTester< long double, tnlHost, long int > >() )
-     return EXIT_FAILURE;*/
+     return EXIT_FAILURE;
    return EXIT_SUCCESS;
 #else
    return EXIT_FAILURE;
diff --git a/tests/unit-tests/core/tnlArrayTester.h b/tests/unit-tests/core/tnlArrayTester.h
index 9bb9e8b0177fdf4a466048f88e1db3192056382e..42859c99c0dc96c259e4f36cffcc4b27ccd2c8aa 100644
--- a/tests/unit-tests/core/tnlArrayTester.h
+++ b/tests/unit-tests/core/tnlArrayTester.h
@@ -213,7 +213,9 @@ class tnlArrayTester : public CppUnit :: TestCase
    };
 
 };
-
+#else /* HAVE_CPPUNIT */
+template< typename ElementType, typename Device, typename IndexType >
+class tnlArrayTester{};
 #endif /* HAVE_CPPUNIT */
 
 #endif /* TNLARRAYMANAGERTESTER_H_ */