Loading src/TNL/Matrices/Matrix.h +0 −7 Original line number Diff line number Diff line Loading @@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index return str; } template< typename Matrix, typename InVector, typename OutVector > void MatrixVectorProductCuda( const Matrix& matrix, const InVector& inVector, OutVector& outVector ); } // namespace Matrices } // namespace TNL Loading src/TNL/Matrices/Matrix.hpp +0 −49 Original line number Diff line number Diff line Loading @@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) } } #ifdef HAVE_CUDA template< typename Matrix, typename InVector, typename OutVector > __global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, const InVector* inVector, OutVector* outVector, int gridIdx ) { static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx < matrix->getRows() ) ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); } #endif template< typename Matrix, typename InVector, typename OutVector > void MatrixVectorProductCuda( const Matrix& matrix, const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_CUDA typedef typename Matrix::IndexType IndexType; Matrix* kernel_this = Cuda::passToDevice( matrix ); InVector* kernel_inVector = Cuda::passToDevice( inVector ); OutVector* kernel_outVector = Cuda::passToDevice( outVector ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_this, kernel_inVector, kernel_outVector, gridIdx ); TNL_CHECK_CUDA_DEVICE; } Cuda::freeFromDevice( kernel_this ); Cuda::freeFromDevice( kernel_inVector ); Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } } // namespace Matrices } // namespace TNL Loading
src/TNL/Matrices/Matrix.h +0 −7 Original line number Diff line number Diff line Loading @@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index return str; } template< typename Matrix, typename InVector, typename OutVector > void MatrixVectorProductCuda( const Matrix& matrix, const InVector& inVector, OutVector& outVector ); } // namespace Matrices } // namespace TNL Loading
src/TNL/Matrices/Matrix.hpp +0 −49 Original line number Diff line number Diff line Loading @@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) } } #ifdef HAVE_CUDA template< typename Matrix, typename InVector, typename OutVector > __global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, const InVector* inVector, OutVector* outVector, int gridIdx ) { static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx < matrix->getRows() ) ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); } #endif template< typename Matrix, typename InVector, typename OutVector > void MatrixVectorProductCuda( const Matrix& matrix, const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_CUDA typedef typename Matrix::IndexType IndexType; Matrix* kernel_this = Cuda::passToDevice( matrix ); InVector* kernel_inVector = Cuda::passToDevice( inVector ); OutVector* kernel_outVector = Cuda::passToDevice( outVector ); dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_this, kernel_inVector, kernel_outVector, gridIdx ); TNL_CHECK_CUDA_DEVICE; } Cuda::freeFromDevice( kernel_this ); Cuda::freeFromDevice( kernel_inVector ); Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } } // namespace Matrices } // namespace TNL