diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
index 0570f10f459a2a2250132197251f6b96d9697532..472eb414ab61be1b08d23bfa6bab0f946cfc8fe9 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
@@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination,
       copiedElements += copySize;
    }
 }
-
 template< typename Element1,
           typename Element2,
           typename Index >
@@ -206,9 +205,7 @@ compareMemory( const Element1* destination,
    TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
    TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
 
-   Element1* d;
-   cudaMalloc( ( void** ) &d, size * sizeof( Element1 ) );
-   auto fetch = [=] __cuda_callable__ ( Index i ) { return  d[ 0 ]; }; //( destination[ i ] == source[ i ] ); };
+   auto fetch = [=] __cuda_callable__ ( Index i ) { return  ( destination[ i ] == source[ i ] ); };
    auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; };
    return Reduction< Devices::Cuda >::reduce(
       size,
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 685a3a33216b6eb5d5d88f68c55830e129b88d22..21331defe1b5758f73dd9aa05bb47aadd2198599 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -72,7 +72,7 @@ CudaReductionKernel( const Result zero,
     * Read data into the shared memory. We start with the
     * sequential reduction.
     */
-   /*while( gid + 4 * gridSize < size )
+   while( gid + 4 * gridSize < size )
    {
       sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
       sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) );
@@ -85,14 +85,14 @@ CudaReductionKernel( const Result zero,
       sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
       sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid + gridSize ) );
       gid += 2 * gridSize;
-   }*/
+   }
    while( gid < size )
    {
-      sdata[ tid ] = dataFetcher( gid ); //reduction( sdata[ tid ], dataFetcher( gid ) );
+      sdata[ tid ] = reduction( sdata[ tid ], dataFetcher( gid ) );
       gid += gridSize;
    }
    __syncthreads();
-   return;
+
    //printf( "1: tid %d data %f \n", tid, sdata[ tid ] );
 
    //return;
diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h
index b5fcabdc774b71c48855d2b881fe93db17495154..0fee746cf092cb7b326c5aff4e77024f4870a53c 100644
--- a/src/TNL/Containers/Algorithms/Reduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Reduction_impl.h
@@ -110,9 +110,9 @@ Reduction< Devices::Cuda >::
        * Transfer the reduced data from device to host.
        */
       //ResultType* resultArray[ reducedSize ];
-      //std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] };
-      ResultType* resultArray = new ResultType[ reducedSize ];
-      ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize );
+      std::unique_ptr< ResultType[] > resultArray{ new ResultType[ reducedSize ] };
+      //ResultType* resultArray = new ResultType[ reducedSize ];
+      ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray.get(), deviceAux1, reducedSize );
 
       #ifdef CUDA_REDUCTION_PROFILING
          timer.stop();
@@ -132,7 +132,7 @@ Reduction< Devices::Cuda >::
          std::cout << "   Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
       #endif
       
-      delete[] resultArray;
+      //delete[] resultArray;
       return result;
    }
    else {
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h
index 9a0d24039130a1750bc783a4f84ab958698f9f45..21bfccdea6536c6c4d7292096a7f7dbca60fc3b8 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Containers/ArrayOperationsTest.h
@@ -303,10 +303,9 @@ TYPED_TEST( ArrayOperationsTest, copyMemoryWithConversions_cuda )
    ArrayOperations< Devices::Cuda >::freeMemory( deviceData2 );
 }
 
-//TYPED_TEST( ArrayOperationsTest, compareMemory_cuda )
-void Test()
+TYPED_TEST( ArrayOperationsTest, compareMemory_cuda )
 {
-   using ValueType = double;//typename TestFixture::ValueType;
+   using ValueType = typename TestFixture::ValueType;
    const int size = ARRAY_TEST_SIZE;
 
    ValueType *hostData, *deviceData, *deviceData2;
@@ -446,7 +445,22 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
 #include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
-   Test();
+
+   using ValueType = double;
+   int size = 1000;
+   ValueType *hostData, *deviceData, *deviceData2;
+   ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
+   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
+   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
+
+   ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size );
+   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size );
+   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) ));
+   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) ));
+
+   return 0;
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();