Loading src/TNL/Assert.h +2 −2 Original line number Diff line number Diff line Loading @@ -282,7 +282,7 @@ TNL_IMPL_CMP_HELPER_( GT, > ); } // namespace TNL // Internal macro wrapping the __PRETTY_FUNCTION__ "magic". #if defined( __NVCC__ ) && ( __CUDACC_VER__ < 80000 ) #if defined( __NVCC__ ) && ( __CUDACC_VER_MAJOR__ < 8 ) #define __TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)" #else #define __TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__ Loading src/TNL/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL ) set( headers Assert.h Constants.h CudaSharedMemory.h CudaStreamPool.h Curve.h DevicePointer.h Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +4 −4 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/reduction-operations.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -201,9 +201,9 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result; Algorithms::tnlParallelReductionEqualities< Element1, Index > reductionEqualities; reductionOnCudaDevice( reductionEqualities, size, destination, source, result ); bool result = false; Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result ); return result; } Loading src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +3 −3 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ #include <TNL/Exceptions/MICBadAlloc.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/reduction-operations.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -191,9 +191,9 @@ compareMemory( const Element1* destination, } else { Devices::MICHider<const Element1> src_ptr; Devices::MICHider<const Element2> src_ptr; src_ptr.pointer=source; Devices::MICHider<const Element2> dst_ptr; Devices::MICHider<const Element1> dst_ptr; dst_ptr.pointer=destination; bool ret=false; #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret) Loading src/TNL/Containers/Algorithms/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -6,7 +6,6 @@ set( headers ArrayOperations.h ArrayOperationsMIC_impl.h cuda-prefix-sum.h cuda-prefix-sum_impl.h reduction-operations.h CublasWrapper.h CudaMultireductionKernel.h CudaReductionBuffer.h Loading @@ -15,6 +14,7 @@ set( headers ArrayOperations.h Multireduction_impl.h Reduction.h Reduction_impl.h ReductionOperations.h VectorOperations.h VectorOperationsHost_impl.h VectorOperationsCuda_impl.h Loading Loading
src/TNL/Assert.h +2 −2 Original line number Diff line number Diff line Loading @@ -282,7 +282,7 @@ TNL_IMPL_CMP_HELPER_( GT, > ); } // namespace TNL // Internal macro wrapping the __PRETTY_FUNCTION__ "magic". #if defined( __NVCC__ ) && ( __CUDACC_VER__ < 80000 ) #if defined( __NVCC__ ) && ( __CUDACC_VER_MAJOR__ < 8 ) #define __TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)" #else #define __TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__ Loading
src/TNL/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,7 @@ SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL ) set( headers Assert.h Constants.h CudaSharedMemory.h CudaStreamPool.h Curve.h DevicePointer.h Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +4 −4 Original line number Diff line number Diff line Loading @@ -18,7 +18,7 @@ #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/reduction-operations.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -201,9 +201,9 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); //TODO: The parallel reduction on the CUDA device with different element types is needed. bool result; Algorithms::tnlParallelReductionEqualities< Element1, Index > reductionEqualities; reductionOnCudaDevice( reductionEqualities, size, destination, source, result ); bool result = false; Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result ); return result; } Loading
src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +3 −3 Original line number Diff line number Diff line Loading @@ -20,7 +20,7 @@ #include <TNL/Exceptions/MICBadAlloc.h> #include <TNL/Containers/Algorithms/ArrayOperations.h> #include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Containers/Algorithms/reduction-operations.h> #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { namespace Containers { Loading Loading @@ -191,9 +191,9 @@ compareMemory( const Element1* destination, } else { Devices::MICHider<const Element1> src_ptr; Devices::MICHider<const Element2> src_ptr; src_ptr.pointer=source; Devices::MICHider<const Element2> dst_ptr; Devices::MICHider<const Element1> dst_ptr; dst_ptr.pointer=destination; bool ret=false; #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret) Loading
src/TNL/Containers/Algorithms/CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -6,7 +6,6 @@ set( headers ArrayOperations.h ArrayOperationsMIC_impl.h cuda-prefix-sum.h cuda-prefix-sum_impl.h reduction-operations.h CublasWrapper.h CudaMultireductionKernel.h CudaReductionBuffer.h Loading @@ -15,6 +14,7 @@ set( headers ArrayOperations.h Multireduction_impl.h Reduction.h Reduction_impl.h ReductionOperations.h VectorOperations.h VectorOperationsHost_impl.h VectorOperationsCuda_impl.h Loading