diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h
index 9ce1a109ac09766125aefd9807aa97462789feaf..129a54cbe0cf47499fa5faa5dab45ad09b50834e 100644
--- a/src/TNL/Matrices/Matrix.h
+++ b/src/TNL/Matrices/Matrix.h
@@ -109,13 +109,6 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index
    return str;
 }
 
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                              const InVector& inVector,
-                              OutVector& outVector );
-
 } // namespace Matrices
 } // namespace TNL
 
diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp
index b7e00067017ee7ef88d6506d3889410ce4f0e13f..ce5f52274ec1134f30a52b64bf1572b7d757dc84 100644
--- a/src/TNL/Matrices/Matrix.hpp
+++ b/src/TNL/Matrices/Matrix.hpp
@@ -250,54 +250,5 @@ computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector)
     }
 }
 
-#ifdef HAVE_CUDA
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix,
-                                                  const InVector* inVector,
-                                                  OutVector* outVector,
-                                                  int gridIdx )
-{
-   static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
-   const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
-   if( rowIdx < matrix->getRows() )
-      ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
-}
-#endif
-
-template< typename Matrix,
-          typename InVector,
-          typename OutVector >
-void MatrixVectorProductCuda( const Matrix& matrix,
-                                 const InVector& inVector,
-                                 OutVector& outVector )
-{
-#ifdef HAVE_CUDA
-   typedef typename Matrix::IndexType IndexType;
-   Matrix* kernel_this = Cuda::passToDevice( matrix );
-   InVector* kernel_inVector = Cuda::passToDevice( inVector );
-   OutVector* kernel_outVector = Cuda::passToDevice( outVector );
-   dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
-   const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
-   const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() );
-   for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
-   {
-      if( gridIdx == cudaGrids - 1 )
-         cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize();
-      MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
-                                     ( kernel_this,
-                                       kernel_inVector,
-                                       kernel_outVector,
-                                       gridIdx );
-      TNL_CHECK_CUDA_DEVICE;
-   }
-   Cuda::freeFromDevice( kernel_this );
-   Cuda::freeFromDevice( kernel_inVector );
-   Cuda::freeFromDevice( kernel_outVector );
-   TNL_CHECK_CUDA_DEVICE;
-#endif
-}
-
 } // namespace Matrices
 } // namespace TNL