From 0ef6688575b2474cda5bec9349810a34390572fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com>
Date: Sun, 14 Apr 2019 10:36:43 +0200
Subject: [PATCH] Parallel reduction is working.

---
 .../Algorithms/ArrayOperationsCuda.hpp           |  6 +-----
 .../Containers/Algorithms/CudaReductionKernel.h  |  7 ++-----
 src/UnitTests/Containers/ArrayOperationsTest.h   | 16 ----------------
 3 files changed, 3 insertions(+), 26 deletions(-)

diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
index 472eb414ab..a12b9c67fb 100644
--- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
+++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp
@@ -207,11 +207,7 @@ compareMemory( const Element1* destination,
 
    auto fetch = [=] __cuda_callable__ ( Index i ) { return  ( destination[ i ] == source[ i ] ); };
    auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; };
-   return Reduction< Devices::Cuda >::reduce(
-      size,
-      reduction, //[=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; },
-      fetch, //[=] __cuda_callable__ ( Index i ) { return  destination[ i ]; },
-      true );
+   return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true );
 
    /*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
    return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/
diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 21331defe1..8fea90f8f4 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -47,8 +47,8 @@ template< int blockSize,
 __global__ void
 __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor )
 CudaReductionKernel( const Result zero,
-                     const DataFetcher& dataFetcher,
-                     const Reduction& reduction,
+                     const DataFetcher dataFetcher,
+                     const Reduction reduction,
                      const Index size,
                      Result* output )
 {
@@ -94,8 +94,6 @@ CudaReductionKernel( const Result zero,
    __syncthreads();
 
    //printf( "1: tid %d data %f \n", tid, sdata[ tid ] );
-
-   //return;
    /***
     *  Perform the parallel reduction.
     */
@@ -127,7 +125,6 @@ CudaReductionKernel( const Result zero,
       //printf( "3: tid %d data %f \n", tid, sdata[ tid ] );
    }
 
-
    /***
     * This runs in one warp so it is synchronized implicitly.
     */
diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h
index 21bfccdea6..23b8fcd4eb 100644
--- a/src/UnitTests/Containers/ArrayOperationsTest.h
+++ b/src/UnitTests/Containers/ArrayOperationsTest.h
@@ -445,22 +445,6 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda )
 #include "../GtestMissingError.h"
 int main( int argc, char* argv[] )
 {
-
-   using ValueType = double;
-   int size = 1000;
-   ValueType *hostData, *deviceData, *deviceData2;
-   ArrayOperations< Devices::Host >::allocateMemory( hostData, size );
-   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size );
-   ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size );
-
-   ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size );
-   ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size );
-   EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) ));
-   EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) ));
-
-   return 0;
 #ifdef HAVE_GTEST
    ::testing::InitGoogleTest( &argc, argv );
    return RUN_ALL_TESTS();
-- 
GitLab