Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +1 −5 Original line number Diff line number Diff line Loading @@ -207,11 +207,7 @@ compareMemory( const Element1* destination, auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, //[=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }, fetch, //[=] __cuda_callable__ ( Index i ) { return destination[ i ]; }, true ); return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); /*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/ Loading src/TNL/Containers/Algorithms/CudaReductionKernel.h +2 −5 Original line number Diff line number Diff line Loading @@ -47,8 +47,8 @@ template< int blockSize, __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( const Result zero, const DataFetcher& dataFetcher, const Reduction& reduction, const DataFetcher dataFetcher, const Reduction reduction, const Index size, Result* output ) { Loading Loading @@ -94,8 +94,6 @@ CudaReductionKernel( const Result zero, __syncthreads(); //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); //return; /*** * Perform the parallel reduction. */ Loading Loading @@ -127,7 +125,6 @@ CudaReductionKernel( const Result zero, //printf( "3: tid %d data %f \n", tid, sdata[ tid ] ); } /*** * This runs in one warp so it is synchronized implicitly. */ Loading src/UnitTests/Containers/ArrayOperationsTest.h +0 −16 Original line number Diff line number Diff line Loading @@ -445,22 +445,6 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { using ValueType = double; int size = 1000; ValueType *hostData, *deviceData, *deviceData2; ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); Loading Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +1 −5 Original line number Diff line number Diff line Loading @@ -207,11 +207,7 @@ compareMemory( const Element1* destination, auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); }; auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }; return Reduction< Devices::Cuda >::reduce( size, reduction, //[=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; }, fetch, //[=] __cuda_callable__ ( Index i ) { return destination[ i ]; }, true ); return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true ); /*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/ Loading
src/TNL/Containers/Algorithms/CudaReductionKernel.h +2 −5 Original line number Diff line number Diff line Loading @@ -47,8 +47,8 @@ template< int blockSize, __global__ void __launch_bounds__( Reduction_maxThreadsPerBlock, Reduction_minBlocksPerMultiprocessor ) CudaReductionKernel( const Result zero, const DataFetcher& dataFetcher, const Reduction& reduction, const DataFetcher dataFetcher, const Reduction reduction, const Index size, Result* output ) { Loading Loading @@ -94,8 +94,6 @@ CudaReductionKernel( const Result zero, __syncthreads(); //printf( "1: tid %d data %f \n", tid, sdata[ tid ] ); //return; /*** * Perform the parallel reduction. */ Loading Loading @@ -127,7 +125,6 @@ CudaReductionKernel( const Result zero, //printf( "3: tid %d data %f \n", tid, sdata[ tid ] ); } /*** * This runs in one warp so it is synchronized implicitly. */ Loading
src/UnitTests/Containers/ArrayOperationsTest.h +0 −16 Original line number Diff line number Diff line Loading @@ -445,22 +445,6 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) #include "../GtestMissingError.h" int main( int argc, char* argv[] ) { using ValueType = double; int size = 1000; ValueType *hostData, *deviceData, *deviceData2; ArrayOperations< Devices::Host >::allocateMemory( hostData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Cuda >::allocateMemory( deviceData2, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 7, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 8, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData2, (ValueType) 9, size ); EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< ValueType, ValueType >( hostData, deviceData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< ValueType, ValueType >( deviceData, hostData, size ) )); EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compareMemory< ValueType, ValueType >( deviceData, deviceData2, size ) )); return 0; #ifdef HAVE_GTEST ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); Loading