Loading src/TNL/Containers/Algorithms/ArrayOperations.h +10 −10 Original line number Original line Diff line number Diff line Loading @@ -42,14 +42,14 @@ class ArrayOperations< Devices::Host > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -93,14 +93,14 @@ class ArrayOperations< Devices::Cuda > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -132,7 +132,7 @@ class ArrayOperations< Devices::Cuda, Devices::Host > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading @@ -152,7 +152,7 @@ class ArrayOperations< Devices::Host, Devices::Cuda > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -185,14 +185,14 @@ class ArrayOperations< Devices::MIC > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -224,7 +224,7 @@ class ArrayOperations< Devices::MIC, Devices::Host > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading @@ -244,7 +244,7 @@ class ArrayOperations< Devices::Host, Devices::MIC > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +7 −23 Original line number Original line Diff line number Diff line Loading @@ -100,7 +100,7 @@ setArrayValueCudaKernel( Element* data, #endif #endif template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::Cuda >:: ArrayOperations< Devices::Cuda >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -114,7 +114,6 @@ setMemory( Element* data, gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -142,7 +141,7 @@ copyMemoryCudaToCudaKernel( DestinationElement* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Cuda >:: ArrayOperations< Devices::Cuda >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -158,7 +157,6 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); cudaMemcpyDeviceToDevice ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -168,7 +166,6 @@ copyMemory( DestinationElement* destination, gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -236,7 +233,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host, Devices::Cuda >:: ArrayOperations< Devices::Host, Devices::Cuda >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -253,7 +250,6 @@ copyMemory( DestinationElement* destination, cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -265,11 +261,8 @@ copyMemory( DestinationElement* destination, (void*) &source[ i ], (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } Index j( 0 ); Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) { { Loading @@ -279,7 +272,6 @@ copyMemory( DestinationElement* destination, i += j; i += j; } } } } return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -311,11 +303,8 @@ compareMemory( const Element1* destination, (void*) &source[ compared ], (void*) &source[ compared ], transfer * sizeof( Element2 ), transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) return false; return false; compared += transfer; compared += transfer; Loading @@ -332,7 +321,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Cuda, Devices::Host >:: ArrayOperations< Devices::Cuda, Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -350,7 +339,6 @@ copyMemory( DestinationElement* destination, cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -368,14 +356,10 @@ copyMemory( DestinationElement* destination, (void*) buffer.get(), (void*) buffer.get(), j * sizeof( DestinationElement ), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) { std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } i += j; i += j; } } return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h +2 −4 Original line number Original line Diff line number Diff line Loading @@ -62,7 +62,7 @@ getMemoryElement( const Element* data ) } } template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::Host >:: ArrayOperations< Devices::Host >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -70,13 +70,12 @@ setMemory( Element* data, { { for( Index i = 0; i < size; i ++ ) for( Index i = 0; i < size; i ++ ) data[ i ] = value; data[ i ] = value; return true; } } template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host >:: ArrayOperations< Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -100,7 +99,6 @@ copyMemory( DestinationElement* destination, else else for( Index i = 0; i < size; i ++ ) for( Index i = 0; i < size; i ++ ) destination[ i ] = ( DestinationElement ) source[ i ]; destination[ i ] = ( DestinationElement ) source[ i ]; return true; } } template< typename DestinationElement, template< typename DestinationElement, Loading src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +4 −20 Original line number Original line Diff line number Diff line Loading @@ -78,7 +78,7 @@ getMemoryElement( const Element* data ) } } template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::MIC >:: ArrayOperations< Devices::MIC >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -95,7 +95,6 @@ setMemory( Element* data, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst[i]=tmp; dst[i]=tmp; } } return true; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading @@ -104,7 +103,7 @@ setMemory( Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::MIC >:: ArrayOperations< Devices::MIC >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -123,7 +122,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); } } return true; } } else else { { Loading @@ -136,13 +134,10 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src_ptr.pointer[i]; dst_ptr.pointer[i]=src_ptr.pointer[i]; } } return true; } } #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif return false; } } template< typename Element1, template< typename Element1, Loading Loading @@ -242,7 +237,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host, Devices::MIC >:: ArrayOperations< Devices::Host, Devices::MIC >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -267,7 +262,6 @@ copyMemory( DestinationElement* destination, } } memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); return true; } } else else { { Loading @@ -277,7 +271,6 @@ copyMemory( DestinationElement* destination, { { memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); } } return true; } } } } else else Loading @@ -297,7 +290,6 @@ copyMemory( DestinationElement* destination, } } memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); return true; } } else else { { Loading @@ -309,10 +301,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst[i]=src_ptr.pointer[i]; dst[i]=src_ptr.pointer[i]; } } return true; } } } } return false; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading Loading @@ -368,7 +358,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::MIC, Devices::Host >:: ArrayOperations< Devices::MIC, Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -393,8 +383,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); } } return true; } } else else { { Loading @@ -404,7 +392,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); } } return true; } } } } else else Loading @@ -423,7 +410,6 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; dst_ptr.pointer[i]=src[i]; } } return true; } } else else { { Loading @@ -435,10 +421,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; dst_ptr.pointer[i]=src[i]; } } return true; } } } } return false; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading src/TNL/Containers/Algorithms/Multireduction_impl.h +3 −6 Original line number Original line Diff line number Diff line Loading @@ -75,13 +75,11 @@ reduce( Operation& operation, */ */ if( n * ldInput1 < Multireduction_minGpuDataSize ) { if( n * ldInput1 < Multireduction_minGpuDataSize ) { DataType1 hostArray1[ Multireduction_minGpuDataSize ]; DataType1 hostArray1[ Multireduction_minGpuDataSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ); return false; if( deviceInput2 ) { if( deviceInput2 ) { using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; _DT2 hostArray2[ Multireduction_minGpuDataSize ]; _DT2 hostArray2[ Multireduction_minGpuDataSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ); return false; return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); } } else { else { Loading Loading @@ -117,8 +115,7 @@ reduce( Operation& operation, * Transfer the reduced data from device to host. * Transfer the reduced data from device to host. */ */ ResultType resultArray[ n * reducedSize ]; ResultType resultArray[ n * reducedSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ); return false; #ifdef CUDA_REDUCTION_PROFILING #ifdef CUDA_REDUCTION_PROFILING timer.stop(); timer.stop(); Loading Loading
src/TNL/Containers/Algorithms/ArrayOperations.h +10 −10 Original line number Original line Diff line number Diff line Loading @@ -42,14 +42,14 @@ class ArrayOperations< Devices::Host > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -93,14 +93,14 @@ class ArrayOperations< Devices::Cuda > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -132,7 +132,7 @@ class ArrayOperations< Devices::Cuda, Devices::Host > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading @@ -152,7 +152,7 @@ class ArrayOperations< Devices::Host, Devices::Cuda > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -185,14 +185,14 @@ class ArrayOperations< Devices::MIC > static Element getMemoryElement( const Element* data ); static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > template< typename Element, typename Index > static bool setMemory( Element* data, static void setMemory( Element* data, const Element& value, const Element& value, const Index size ); const Index size ); template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading Loading @@ -224,7 +224,7 @@ class ArrayOperations< Devices::MIC, Devices::Host > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading @@ -244,7 +244,7 @@ class ArrayOperations< Devices::Host, Devices::MIC > template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > static bool copyMemory( DestinationElement* destination, static void copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, const Index size ); const Index size ); Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +7 −23 Original line number Original line Diff line number Diff line Loading @@ -100,7 +100,7 @@ setArrayValueCudaKernel( Element* data, #endif #endif template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::Cuda >:: ArrayOperations< Devices::Cuda >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -114,7 +114,6 @@ setMemory( Element* data, gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -142,7 +141,7 @@ copyMemoryCudaToCudaKernel( DestinationElement* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Cuda >:: ArrayOperations< Devices::Cuda >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -158,7 +157,6 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); cudaMemcpyDeviceToDevice ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -168,7 +166,6 @@ copyMemory( DestinationElement* destination, gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading Loading @@ -236,7 +233,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host, Devices::Cuda >:: ArrayOperations< Devices::Host, Devices::Cuda >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -253,7 +250,6 @@ copyMemory( DestinationElement* destination, cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -265,11 +261,8 @@ copyMemory( DestinationElement* destination, (void*) &source[ i ], (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } Index j( 0 ); Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) { { Loading @@ -279,7 +272,6 @@ copyMemory( DestinationElement* destination, i += j; i += j; } } } } return true; #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); #endif #endif Loading Loading @@ -311,11 +303,8 @@ compareMemory( const Element1* destination, (void*) &source[ compared ], (void*) &source[ compared ], transfer * sizeof( Element2 ), transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) cudaMemcpyDeviceToHost ) != cudaSuccess ) { std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) return false; return false; compared += transfer; compared += transfer; Loading @@ -332,7 +321,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Cuda, Devices::Host >:: ArrayOperations< Devices::Cuda, Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -350,7 +339,6 @@ copyMemory( DestinationElement* destination, cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } } else else { { Loading @@ -368,14 +356,10 @@ copyMemory( DestinationElement* destination, (void*) buffer.get(), (void*) buffer.get(), j * sizeof( DestinationElement ), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) cudaMemcpyHostToDevice ) != cudaSuccess ) { std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; TNL_CHECK_CUDA_DEVICE; TNL_CHECK_CUDA_DEVICE; return true; } i += j; i += j; } } return true; } } #else #else throw Exceptions::CudaSupportMissing(); throw Exceptions::CudaSupportMissing(); Loading
src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h +2 −4 Original line number Original line Diff line number Diff line Loading @@ -62,7 +62,7 @@ getMemoryElement( const Element* data ) } } template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::Host >:: ArrayOperations< Devices::Host >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -70,13 +70,12 @@ setMemory( Element* data, { { for( Index i = 0; i < size; i ++ ) for( Index i = 0; i < size; i ++ ) data[ i ] = value; data[ i ] = value; return true; } } template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host >:: ArrayOperations< Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -100,7 +99,6 @@ copyMemory( DestinationElement* destination, else else for( Index i = 0; i < size; i ++ ) for( Index i = 0; i < size; i ++ ) destination[ i ] = ( DestinationElement ) source[ i ]; destination[ i ] = ( DestinationElement ) source[ i ]; return true; } } template< typename DestinationElement, template< typename DestinationElement, Loading
src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +4 −20 Original line number Original line Diff line number Diff line Loading @@ -78,7 +78,7 @@ getMemoryElement( const Element* data ) } } template< typename Element, typename Index > template< typename Element, typename Index > bool void ArrayOperations< Devices::MIC >:: ArrayOperations< Devices::MIC >:: setMemory( Element* data, setMemory( Element* data, const Element& value, const Element& value, Loading @@ -95,7 +95,6 @@ setMemory( Element* data, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst[i]=tmp; dst[i]=tmp; } } return true; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading @@ -104,7 +103,7 @@ setMemory( Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::MIC >:: ArrayOperations< Devices::MIC >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -123,7 +122,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); } } return true; } } else else { { Loading @@ -136,13 +134,10 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src_ptr.pointer[i]; dst_ptr.pointer[i]=src_ptr.pointer[i]; } } return true; } } #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif return false; } } template< typename Element1, template< typename Element1, Loading Loading @@ -242,7 +237,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::Host, Devices::MIC >:: ArrayOperations< Devices::Host, Devices::MIC >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -267,7 +262,6 @@ copyMemory( DestinationElement* destination, } } memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); return true; } } else else { { Loading @@ -277,7 +271,6 @@ copyMemory( DestinationElement* destination, { { memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); } } return true; } } } } else else Loading @@ -297,7 +290,6 @@ copyMemory( DestinationElement* destination, } } memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); return true; } } else else { { Loading @@ -309,10 +301,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst[i]=src_ptr.pointer[i]; dst[i]=src_ptr.pointer[i]; } } return true; } } } } return false; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading Loading @@ -368,7 +358,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, template< typename DestinationElement, typename SourceElement, typename SourceElement, typename Index > typename Index > bool void ArrayOperations< Devices::MIC, Devices::Host >:: ArrayOperations< Devices::MIC, Devices::Host >:: copyMemory( DestinationElement* destination, copyMemory( DestinationElement* destination, const SourceElement* source, const SourceElement* source, Loading @@ -393,8 +383,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); } } return true; } } else else { { Loading @@ -404,7 +392,6 @@ copyMemory( DestinationElement* destination, { { memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); } } return true; } } } } else else Loading @@ -423,7 +410,6 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; dst_ptr.pointer[i]=src[i]; } } return true; } } else else { { Loading @@ -435,10 +421,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; dst_ptr.pointer[i]=src[i]; } } return true; } } } } return false; #else #else throw Exceptions::MICSupportMissing(); throw Exceptions::MICSupportMissing(); #endif #endif Loading
src/TNL/Containers/Algorithms/Multireduction_impl.h +3 −6 Original line number Original line Diff line number Diff line Loading @@ -75,13 +75,11 @@ reduce( Operation& operation, */ */ if( n * ldInput1 < Multireduction_minGpuDataSize ) { if( n * ldInput1 < Multireduction_minGpuDataSize ) { DataType1 hostArray1[ Multireduction_minGpuDataSize ]; DataType1 hostArray1[ Multireduction_minGpuDataSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ); return false; if( deviceInput2 ) { if( deviceInput2 ) { using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; _DT2 hostArray2[ Multireduction_minGpuDataSize ]; _DT2 hostArray2[ Multireduction_minGpuDataSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ); return false; return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); } } else { else { Loading Loading @@ -117,8 +115,7 @@ reduce( Operation& operation, * Transfer the reduced data from device to host. * Transfer the reduced data from device to host. */ */ ResultType resultArray[ n * reducedSize ]; ResultType resultArray[ n * reducedSize ]; if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ) ) ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ); return false; #ifdef CUDA_REDUCTION_PROFILING #ifdef CUDA_REDUCTION_PROFILING timer.stop(); timer.stop(); Loading