Commit f4144f9c authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Replacing __device__ __host__ with __cuda_callable__.

parent b9f067c5
Loading
Loading
Loading
Loading
+10 −30
Original line number Diff line number Diff line
@@ -66,9 +66,7 @@ public:
					 const IndexType column,
					 const RealType& value );

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	bool setElementFast( const IndexType row,
						 const IndexType column,
						 const RealType& value );
@@ -78,9 +76,7 @@ public:
					 const RealType& value,
					 const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	bool addElementFast( const IndexType row,
						 const IndexType column,
						 const RealType& value,
@@ -100,9 +96,7 @@ public:
	RealType getElement( const IndexType row,
					 	 const IndexType column ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	RealType getElementFast( const IndexType row,
							 const IndexType column ) const;

@@ -111,9 +105,7 @@ public:
			 	    IndexType* columns,
			 	    RealType* values ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getGroupLength( const IndexType strip,
							  const IndexType group ) const;

@@ -129,9 +121,7 @@ public:

	void setVirtualRows(const IndexType rows);

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getNumberOfGroups( const IndexType row ) const;

	bool vectorProductTest() const;
@@ -155,36 +145,26 @@ public:

	template< typename InVector,
			  typename OutVector >
#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void spmvCuda( const InVector& inVector,
				   OutVector& outVector,
				   /*const IndexType warpStart,
				   const IndexType inWarpIdx*/
				   int globalIdx ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getStripLength( const IndexType strip ) const;

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
										 const IndexType strip );

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
									   const IndexType numberOfStrips,
									   const IndexType strip );

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	IndexType power( const IndexType number,
				     const IndexType exponent ) const;

+10 −30
Original line number Diff line number Diff line
@@ -56,9 +56,7 @@ public:
					 const IndexType column,
					 const RealType& value );

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	bool setElementFast( const IndexType row,
						 const IndexType column,
						 const RealType& value );
@@ -68,9 +66,7 @@ public:
					 const RealType& value,
					 const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	bool addElementFast( const IndexType row,
						 const IndexType column,
						 const RealType& value,
@@ -90,9 +86,7 @@ public:
	RealType getElement( const IndexType row,
					 	 const IndexType column ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	RealType getElementFast( const IndexType row,
							 const IndexType column ) const;

@@ -100,9 +94,7 @@ public:
			 	 IndexType* columns,
			 	 RealType* values ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getGroupLength( const IndexType strip,
							  const IndexType group ) const;

@@ -118,9 +110,7 @@ public:

	void setVirtualRows(const IndexType rows);

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getNumberOfGroups( const IndexType row ) const;

	bool vectorProductTest() const;
@@ -144,36 +134,26 @@ public:

	template< typename InVector,
			  typename OutVector >
#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void spmvCuda( const InVector& inVector,
				   OutVector& outVector,
				   /*const IndexType warpStart,
				   const IndexType inWarpIdx*/
				   int globalIdx ) const;

#ifdef HAVE_CUDA
	__device__ __host__
#endif
   __cuda_callable__
	IndexType getStripLength( const IndexType strip ) const;

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void performRowBubbleSortCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
										 const IndexType strip );

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	void computeColumnSizesCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
									   const IndexType numberOfStrips,
									   const IndexType strip );

#ifdef HAVE_CUDA
	__device__
#endif
   __cuda_callable__
	IndexType power( const IndexType number,
				     const IndexType exponent ) const;

+7 −21
Original line number Diff line number Diff line
@@ -22,9 +22,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
   __cuda_callable__
Index BiEllpackSymmetric< Real, Device, Index, StripSize >::power( const IndexType number,
                                                                   const IndexType exponent ) const
{
@@ -127,9 +125,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const
{
    TNL_ASSERT( strip >= 0,
@@ -144,9 +140,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const
{
    TNL_ASSERT( row >=0 && row < this->getRows(),
@@ -251,9 +245,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setElementFast( const IndexType row,
                                                                           const IndexType column,
                                                                           const RealType& value )
@@ -311,9 +303,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
bool BiEllpackSymmetric< Real, Device, Index, StripSize >::addElementFast( const IndexType row,
                                                                           const IndexType column,
                                                                           const RealType& value,
@@ -488,9 +478,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Real BiEllpackSymmetric< Real, Device, Index, StripSize >::getElementFast( const IndexType row,
                                                                           const IndexType column ) const
{
@@ -584,9 +572,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip,
                                                                            const Index group ) const
{
+9 −23
Original line number Diff line number Diff line
@@ -24,9 +24,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpack< Real, Device, Index, StripSize >::power( const IndexType number,
							   const IndexType exponent ) const
{
@@ -131,9 +129,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpack< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const
{
	TNL_ASSERT( strip >= 0,
@@ -148,9 +144,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const
{
	TNL_ASSERT( row >=0 && row < this->getRows(),
@@ -256,9 +250,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
bool BiEllpack< Real, Device, Index, StripSize >::setElementFast( const IndexType row,
																		   const IndexType column,
																		   const RealType& value )
@@ -316,9 +308,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
bool BiEllpack< Real, Device, Index, StripSize >::addElementFast( const IndexType row,
																	   	   const IndexType column,
																	   	   const RealType& value,
@@ -492,9 +482,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Real BiEllpack< Real, Device, Index, StripSize >::getElementFast( const IndexType row,
																	   	   const IndexType column ) const
{
@@ -588,9 +576,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip,
																 	 	    const Index group ) const
{
@@ -1321,7 +1307,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
__device__
__cuda_callable__
void BiEllpack< Real, Device, Index, StripSize >::performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
																						   const IndexType strip )
{
@@ -1378,7 +1364,7 @@ template< typename Real,
          typename Device,
          typename Index,
          int StripSize >
__device__
__cuda_callable__
void BiEllpack< Real, Device, Index, StripSize >::computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
																						 const IndexType numberOfStrips,
																						 const IndexType strip )
+9 −25
Original line number Diff line number Diff line
@@ -65,9 +65,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
   bool copyFrom( const Matrix& matrix,
                  const CompressedRowLengthsVector& rowLengths );*/

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value );
@@ -76,9 +74,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
                    const IndexType column,
                    const RealType& value );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value,
@@ -90,9 +86,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
                    const RealType& thisElementMultiplicator = 1.0 );


#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setRowFast( const IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
@@ -104,9 +98,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
                const IndexType elements );


#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addRowFast( const IndexType row,
                    const IndexType* columns,
                    const RealType* values,
@@ -119,18 +111,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
                const IndexType numberOfElements,
                const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   RealType getElementFast( const IndexType row,
                            const IndexType column ) const;

   RealType getElement( const IndexType row,
                        const IndexType column ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   void getRowFast( const IndexType row,
                    IndexType* columns,
                    RealType* values ) const;
@@ -140,9 +128,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index >
                RealType* values ) const;

   template< typename Vector >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   typename Vector::RealType rowVectorProduct( const IndexType row,
                                               const Vector& vector ) const;

@@ -183,9 +169,7 @@ template< typename Vector >

   template< typename InVector,
             typename OutVector >
#ifdef HAVE_CUDA
   __device__
#endif
   __cuda_callable__
   void spmvCuda( const InVector& inVector,
                  OutVector& outVector,
                  int rowIdx ) const;
Loading