Replacing __device__ __host__ with __cuda_callable__. (f4144f9c) · Commits · TNL / tnl-dev

src/TNL/Matrices/BiEllpack.h

+10 −30

Original line number	Diff line number	Diff line
		@@ -66,9 +66,7 @@ public:
		const IndexType column,
		const RealType& value );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool setElementFast( const IndexType row,
		const IndexType column,
		const RealType& value );
		@@ -78,9 +76,7 @@ public:
		const RealType& value,
		const RealType& thisElementMultiplicator = 1.0 );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool addElementFast( const IndexType row,
		const IndexType column,
		const RealType& value,
		@@ -100,9 +96,7 @@ public:
		RealType getElement( const IndexType row,
		const IndexType column ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		RealType getElementFast( const IndexType row,
		const IndexType column ) const;

		@@ -111,9 +105,7 @@ public:
		IndexType* columns,
		RealType* values ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getGroupLength( const IndexType strip,
		const IndexType group ) const;

		@@ -129,9 +121,7 @@ public:

		void setVirtualRows(const IndexType rows);

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getNumberOfGroups( const IndexType row ) const;

		bool vectorProductTest() const;
		@@ -155,36 +145,26 @@ public:

		template< typename InVector,
		typename OutVector >
		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void spmvCuda( const InVector& inVector,
		OutVector& outVector,
		/*const IndexType warpStart,
		const IndexType inWarpIdx*/
		int globalIdx ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getStripLength( const IndexType strip ) const;

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType strip );

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType numberOfStrips,
		const IndexType strip );

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		IndexType power( const IndexType number,
		const IndexType exponent ) const;

src/TNL/Matrices/BiEllpackSymmetric.h

+10 −30

Original line number	Diff line number	Diff line
		@@ -56,9 +56,7 @@ public:
		const IndexType column,
		const RealType& value );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool setElementFast( const IndexType row,
		const IndexType column,
		const RealType& value );
		@@ -68,9 +66,7 @@ public:
		const RealType& value,
		const RealType& thisElementMultiplicator = 1.0 );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool addElementFast( const IndexType row,
		const IndexType column,
		const RealType& value,
		@@ -90,9 +86,7 @@ public:
		RealType getElement( const IndexType row,
		const IndexType column ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		RealType getElementFast( const IndexType row,
		const IndexType column ) const;

		@@ -100,9 +94,7 @@ public:
		IndexType* columns,
		RealType* values ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getGroupLength( const IndexType strip,
		const IndexType group ) const;

		@@ -118,9 +110,7 @@ public:

		void setVirtualRows(const IndexType rows);

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getNumberOfGroups( const IndexType row ) const;

		bool vectorProductTest() const;
		@@ -144,36 +134,26 @@ public:

		template< typename InVector,
		typename OutVector >
		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void spmvCuda( const InVector& inVector,
		OutVector& outVector,
		/*const IndexType warpStart,
		const IndexType inWarpIdx*/
		int globalIdx ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		IndexType getStripLength( const IndexType strip ) const;

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void performRowBubbleSortCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType strip );

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void computeColumnSizesCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType numberOfStrips,
		const IndexType strip );

		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		IndexType power( const IndexType number,
		const IndexType exponent ) const;

src/TNL/Matrices/BiEllpackSymmetric_impl.h

+7 −21

Original line number	Diff line number	Diff line
		@@ -22,9 +22,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpackSymmetric< Real, Device, Index, StripSize >::power( const IndexType number,
		const IndexType exponent ) const
		{
		@@ -127,9 +125,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const
		{
		TNL_ASSERT( strip >= 0,
		@@ -144,9 +140,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const
		{
		TNL_ASSERT( row >=0 && row < this->getRows(),
		@@ -251,9 +245,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setElementFast( const IndexType row,
		const IndexType column,
		const RealType& value )
		@@ -311,9 +303,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool BiEllpackSymmetric< Real, Device, Index, StripSize >::addElementFast( const IndexType row,
		const IndexType column,
		const RealType& value,
		@@ -488,9 +478,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Real BiEllpackSymmetric< Real, Device, Index, StripSize >::getElementFast( const IndexType row,
		const IndexType column ) const
		{
		@@ -584,9 +572,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip,
		const Index group ) const
		{

src/TNL/Matrices/BiEllpack_impl.h

+9 −23

Original line number	Diff line number	Diff line
		@@ -24,9 +24,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpack< Real, Device, Index, StripSize >::power( const IndexType number,
		const IndexType exponent ) const
		{
		@@ -131,9 +129,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpack< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const
		{
		TNL_ASSERT( strip >= 0,
		@@ -148,9 +144,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const
		{
		TNL_ASSERT( row >=0 && row < this->getRows(),
		@@ -256,9 +250,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool BiEllpack< Real, Device, Index, StripSize >::setElementFast( const IndexType row,
		const IndexType column,
		const RealType& value )
		@@ -316,9 +308,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool BiEllpack< Real, Device, Index, StripSize >::addElementFast( const IndexType row,
		const IndexType column,
		const RealType& value,
		@@ -492,9 +482,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Real BiEllpack< Real, Device, Index, StripSize >::getElementFast( const IndexType row,
		const IndexType column ) const
		{
		@@ -588,9 +576,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip,
		const Index group ) const
		{
		@@ -1321,7 +1307,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		__device__
		__cuda_callable__
		void BiEllpack< Real, Device, Index, StripSize >::performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType strip )
		{
		@@ -1378,7 +1364,7 @@ template< typename Real,
		typename Device,
		typename Index,
		int StripSize >
		__device__
		__cuda_callable__
		void BiEllpack< Real, Device, Index, StripSize >::computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
		const IndexType numberOfStrips,
		const IndexType strip )

src/TNL/Matrices/EllpackSymmetric.h

+9 −25

Original line number	Diff line number	Diff line
		@@ -65,9 +65,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		bool copyFrom( const Matrix& matrix,
		const CompressedRowLengthsVector& rowLengths );*/

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool setElementFast( const IndexType row,
		const IndexType column,
		const RealType& value );
		@@ -76,9 +74,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		const IndexType column,
		const RealType& value );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool addElementFast( const IndexType row,
		const IndexType column,
		const RealType& value,
		@@ -90,9 +86,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		const RealType& thisElementMultiplicator = 1.0 );


		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool setRowFast( const IndexType row,
		const IndexType* columnIndexes,
		const RealType* values,
		@@ -104,9 +98,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		const IndexType elements );


		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		bool addRowFast( const IndexType row,
		const IndexType* columns,
		const RealType* values,
		@@ -119,18 +111,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		const IndexType numberOfElements,
		const RealType& thisElementMultiplicator = 1.0 );

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		RealType getElementFast( const IndexType row,
		const IndexType column ) const;

		RealType getElement( const IndexType row,
		const IndexType column ) const;

		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		void getRowFast( const IndexType row,
		IndexType* columns,
		RealType* values ) const;
		@@ -140,9 +128,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
		RealType* values ) const;

		template< typename Vector >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		__cuda_callable__
		typename Vector::RealType rowVectorProduct( const IndexType row,
		const Vector& vector ) const;

		@@ -183,9 +169,7 @@ template< typename Vector >

		template< typename InVector,
		typename OutVector >
		#ifdef HAVE_CUDA
		__device__
		#endif
		__cuda_callable__
		void spmvCuda( const InVector& inVector,
		OutVector& outVector,
		int rowIdx ) const;