diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index 9ce1a109ac09766125aefd9807aa97462789feaf..129a54cbe0cf47499fa5faa5dab45ad09b50834e 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index return str; } -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ); - } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp index b7e00067017ee7ef88d6506d3889410ce4f0e13f..ce5f52274ec1134f30a52b64bf1572b7d757dc84 100644 --- a/src/TNL/Matrices/Matrix.hpp +++ b/src/TNL/Matrices/Matrix.hpp @@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) } } -#ifdef HAVE_CUDA -template< typename Matrix, - typename InVector, - typename OutVector > -__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, - const InVector* inVector, - OutVector* outVector, - int gridIdx ) -{ - static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < matrix->getRows() ) - ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); -} -#endif - -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_this, - kernel_inVector, - kernel_outVector, - gridIdx ); - TNL_CHECK_CUDA_DEVICE; - } - Cuda::freeFromDevice( kernel_this ); - Cuda::freeFromDevice( kernel_inVector ); - Cuda::freeFromDevice( kernel_outVector ); - TNL_CHECK_CUDA_DEVICE; -#endif -} - } // namespace Matrices } // namespace TNL