Loading src/TNL/Algorithms/CudaScanKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -277,7 +277,7 @@ struct CudaScanKernelLauncher elementsInBlock, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ], &blockSums[ gridIdx * maxGridSize() ] ); &blockSums.getData()[ gridIdx * maxGridSize() ] ); } // synchronize the null-stream after all grids Loading src/TNL/Algorithms/Scan.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -225,8 +225,8 @@ perform( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::perform( end - begin, &v[ begin ], // input &v[ begin ], // output &v.getData()[ begin ], // input &v.getData()[ begin ], // output reduction, zero ); #else Loading @@ -251,8 +251,8 @@ performFirstPhase( Vector& v, return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase( end - begin, &v[ begin ], // input &v[ begin ], // output &v.getData()[ begin ], // input &v.getData()[ begin ], // output reduction, zero ); #else Loading @@ -279,7 +279,7 @@ performSecondPhase( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase( end - begin, &v[ begin ], // output &v.getData()[ begin ], // output blockShifts.getData(), reduction, shift ); Loading src/TNL/Containers/Array.h +8 −2 Original line number Diff line number Diff line Loading @@ -446,7 +446,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. Loading @@ -460,7 +463,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. Loading src/TNL/Containers/Array.hpp +12 −0 Original line number Diff line number Diff line Loading @@ -509,6 +509,12 @@ Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; Loading @@ -523,6 +529,12 @@ const Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; Loading src/TNL/Containers/ArrayView.h +8 −2 Original line number Diff line number Diff line Loading @@ -347,7 +347,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. Loading @@ -362,7 +365,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. Loading Loading
src/TNL/Algorithms/CudaScanKernel.h +1 −1 Original line number Diff line number Diff line Loading @@ -277,7 +277,7 @@ struct CudaScanKernelLauncher elementsInBlock, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ], &blockSums[ gridIdx * maxGridSize() ] ); &blockSums.getData()[ gridIdx * maxGridSize() ] ); } // synchronize the null-stream after all grids Loading
src/TNL/Algorithms/Scan.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -225,8 +225,8 @@ perform( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::perform( end - begin, &v[ begin ], // input &v[ begin ], // output &v.getData()[ begin ], // input &v.getData()[ begin ], // output reduction, zero ); #else Loading @@ -251,8 +251,8 @@ performFirstPhase( Vector& v, return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase( end - begin, &v[ begin ], // input &v[ begin ], // output &v.getData()[ begin ], // input &v.getData()[ begin ], // output reduction, zero ); #else Loading @@ -279,7 +279,7 @@ performSecondPhase( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase( end - begin, &v[ begin ], // output &v.getData()[ begin ], // output blockShifts.getData(), reduction, shift ); Loading
src/TNL/Containers/Array.h +8 −2 Original line number Diff line number Diff line Loading @@ -446,7 +446,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. Loading @@ -460,7 +463,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. Loading
src/TNL/Containers/Array.hpp +12 −0 Original line number Diff line number Diff line Loading @@ -509,6 +509,12 @@ Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { #ifdef __CUDA_ARCH__ TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; Loading @@ -523,6 +529,12 @@ const Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { #ifdef __CUDA_ARCH__ TNL_ASSERT( ( std::is_same< Device, Devices::Cuda >::value ), "Attempt to access data not allocated on CUDA device from CUDA device." ); #else TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Sequential >::value ), "Attempt to access data not allocated on the host from the host." ); #endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; Loading
src/TNL/Containers/ArrayView.h +8 −2 Original line number Diff line number Diff line Loading @@ -347,7 +347,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. Loading @@ -362,7 +365,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be * called only from device kernels. * called only from device kernels. If NDEBUG is not defined, assertions * inside this methods performs runtime checks for cross-device memory * accesses which lead to segmentation fault. If you need to do just a * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. Loading