Fixed cross-device overload of copySparseMatrix_impl (ea079456) · Commits · TNL / tnl-dev

src/TNL/Matrices/SparseOperations_impl.h

+15 −2

Original line number	Diff line number	Diff line
		@@ -161,10 +161,11 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
		}
		}

		// cross-device copy
		// cross-device copy (host -> gpu)
		template< typename Matrix1,
		typename Matrix2 >
		typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename Matrix2::DeviceType >::value >::type
		typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename Matrix2::DeviceType >::value &&
		std::is_same< typename Matrix2::DeviceType, Devices::Host >::value >::type
		copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
		{
		typename Matrix2::CudaType B_tmp;
		@@ -172,6 +173,18 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
		copySparseMatrix_impl( A, B_tmp );
		}

		// cross-device copy (gpu -> host)
		template< typename Matrix1,
		typename Matrix2 >
		typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename Matrix2::DeviceType >::value &&
		std::is_same< typename Matrix2::DeviceType, Devices::Cuda >::value >::type
		copySparseMatrix_impl( Matrix1& A, const Matrix2& B )
		{
		typename Matrix1::CudaType A_tmp;
		copySparseMatrix_impl( A_tmp, B );
		A = A_tmp;
		}

		template< typename Matrix1, typename Matrix2 >
		void
		copySparseMatrix( Matrix1& A, const Matrix2& B )