diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp index 472eb414ab61be1b08d23bfa6bab0f946cfc8fe9..a12b9c67fb54aa124ceed26608daf9783c209505 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp @@ -207,11 +207,7 @@ compareMemory( const Element1* destination, auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; - return Reduction< Devices::Cuda >::reduce( - size, - reduction, //[=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }, - fetch, //[=] __cuda_callable__ ( Index i ) { return destination[ i ]; }, - true ); + return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); /*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/ diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h index 21331defe1b5758f73dd9aa05bb47aadd2198599..8fea90f8f46f0012f9c368089d2610278c22eb9c 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h @@ -47,8 +47,8 @@ template< int blockSize, __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( const Result zero, - const DataFetcher& dataFetcher, - const Reduction& reduction, + const DataFetcher dataFetcher, + const Reduction reduction, const Index size, Result* output ) { @@ -94,8 +94,6 @@ CudaReductionKernel( const Result zero, __syncthreads(); //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); - - //return; /*** * Perform the parallel reduction. */ @@ -127,7 +125,6 @@ CudaReductionKernel( const Result zero, //printf( "3: tid %d data %f \n", tid, sdata[ tid ] ); } - /*** * This runs in one warp so it is synchronized implicitly. */ diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h index 21bfccdea6536c6c4d7292096a7f7dbca60fc3b8..23b8fcd4ebfef2dfd36b75582cd2b14652988abd 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Containers/ArrayOperationsTest.h @@ -445,22 +445,6 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { - - using ValueType = double; - int size = 1000; - ValueType *hostData, *deviceData, *deviceData2; - ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); - ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); - ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); - - ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); - ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); - ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); - EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); - - return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS();