Commit 0e415b53 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Replacing __device__ __host__ with __cuda_callable__.

parent 0235ef95
Loading
Loading
Loading
Loading
+19 −55
Original line number Diff line number Diff line
@@ -28,9 +28,7 @@ EllpackSymmetricGraph< Real, Device, Index > :: EllpackSymmetricGraph()
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
  __device__ __host__
#endif
__cuda_callable__
Index EllpackSymmetricGraph< Real, Device, Index >::getRowLengthsInt() const
{
    return this->rowLengths;
@@ -101,9 +99,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::setCompressedRowLengths( cons
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
Index EllpackSymmetricGraph< Real, Device, Index >::getRowsOfColor( IndexType color ) const
{
   return this->colorPointers.getElement( color + 1 ) - this->colorPointers.getElement( color );
@@ -178,9 +174,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::computeColorsVector( Containe
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
void EllpackSymmetricGraph< Real, Device, Index >::computePermutationArray()
{
   // init vector of colors and permutation array
@@ -244,9 +238,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::verifyPermutationArray()
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetricGraph< Real, Device, Index >::rearrangeMatrix( bool verbose )
{
   // first we need to know permutation
@@ -303,9 +295,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::rearrangeMatrix( bool verbose
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
  __device__ __host__
#endif
__cuda_callable__
Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getPermutationArray()
{
    return this->permutationArray;
@@ -314,9 +304,7 @@ Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
  __device__ __host__
#endif
__cuda_callable__
Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getInversePermutation()
{
    return this->inversePermutationArray;
@@ -325,9 +313,7 @@ Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
  __device__ __host__
#endif
__cuda_callable__
Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getColorPointers()
{
    return this->colorPointers;
@@ -336,9 +322,7 @@ Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
  __device__ __host__
#endif
__cuda_callable__
void EllpackSymmetricGraph< Real, Device, Index >::copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix )
{
    //  TODO: fix
@@ -432,9 +416,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::copyFrom( const Matrix& matri
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetricGraph< Real, Device, Index > :: setElementFast( const IndexType row,
                                                                     const IndexType column,
                                                                     const Real& value )
@@ -456,9 +438,7 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: setElement( const IndexType
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetricGraph< Real, Device, Index > :: addElementFast( const IndexType row,
                                                                     const IndexType column,
                                                                     const RealType& value,
@@ -547,9 +527,7 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: addElement( const IndexType
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetricGraph< Real, Device, Index > :: setRowFast( const IndexType row,
                                                                 const IndexType* columnIndexes,
                                                                 const RealType* values,
@@ -615,9 +593,7 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: setRow( const IndexType row
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetricGraph< Real, Device, Index > :: addRowFast( const IndexType row,
                                                                 const IndexType* columns,
                                                                 const RealType* values,
@@ -644,9 +620,7 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: addRow( const IndexType row
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
Real EllpackSymmetricGraph< Real, Device, Index >::getElementFast( const IndexType row,
                                                                   const IndexType column ) const
{
@@ -695,9 +669,7 @@ Real EllpackSymmetricGraph< Real, Device, Index >::getElement( const IndexType r
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
void EllpackSymmetricGraph< Real, Device, Index >::getRowFast( const IndexType row,
                                                               IndexType* columns,
                                                               RealType* values ) const
@@ -739,9 +711,7 @@ template< typename Real,
          typename Device,
          typename Index >
  template< typename Vector >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
typename Vector::RealType EllpackSymmetricGraph< Real, Device, Index >::rowVectorProduct( const IndexType row,
                                                                                          const Vector& vector ) const
{
@@ -936,7 +906,7 @@ template< typename Real,
          typename Index >
template< typename InVector,
          typename OutVector >
__device__
__cuda_callable__
void EllpackSymmetricGraph< Real, Device, Index >::spmvCuda( const InVector& inVector,
                                                             OutVector& outVector,
                                                             const int globalIdx,
@@ -990,9 +960,7 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getRowBegin( const EllpackSymmetricGraph< Real, Device, Index >& matrix,
                                const Index row )
      {
@@ -1001,9 +969,7 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getRowEnd( const EllpackSymmetricGraph< Real, Device, Index >& matrix,
                                const Index row )
      {
@@ -1012,9 +978,7 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getElementStep( const EllpackSymmetricGraph< Real, Device, Index >& matrix )
      {
         return matrix.alignedRows;
+11 −31
Original line number Diff line number Diff line
@@ -163,9 +163,7 @@ bool EllpackSymmetric< Real, Device, Index >::copyFrom( const Matrix& matrix,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetric< Real, Device, Index > :: setElementFast( const IndexType row,
                                                                const IndexType column,
                                                                const Real& value )
@@ -187,9 +185,7 @@ bool EllpackSymmetric< Real, Device, Index > :: setElement( const IndexType row,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetric< Real, Device, Index > :: addElementFast( const IndexType row,
                                                                const IndexType column,
                                                                const RealType& value,
@@ -285,9 +281,7 @@ bool EllpackSymmetric< Real, Device, Index > :: addElement( const IndexType row,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetric< Real, Device, Index > :: setRowFast( const IndexType row,
                                                            const IndexType* columnIndexes,
                                                            const RealType* values,
@@ -353,9 +347,7 @@ bool EllpackSymmetric< Real, Device, Index > :: setRow( const IndexType row,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
bool EllpackSymmetric< Real, Device, Index > :: addRowFast( const IndexType row,
                                                            const IndexType* columns,
                                                            const RealType* values,
@@ -382,9 +374,7 @@ bool EllpackSymmetric< Real, Device, Index > :: addRow( const IndexType row,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
Real EllpackSymmetric< Real, Device, Index >::getElementFast( const IndexType row,
                                                              const IndexType column ) const
{
@@ -430,9 +420,7 @@ Real EllpackSymmetric< Real, Device, Index >::getElement( const IndexType row,
template< typename Real,
          typename Device,
          typename Index >
#ifdef HAVE_CUDA
   __device__ __host__
#endif
__cuda_callable__
void EllpackSymmetric< Real, Device, Index >::getRowFast( const IndexType row,
                                                          IndexType* columns,
                                                          RealType* values ) const
@@ -693,9 +681,7 @@ template< typename Real,
        typename Device,
        typename Index >
template< typename Vector >
#ifdef HAVE_CUDA
__device__ __host__
#endif
__cuda_callable__
typename Vector::RealType EllpackSymmetric< Real, Device, Index >::rowVectorProduct( const IndexType row,
                                                                                     const Vector& vector ) const
{
@@ -719,7 +705,7 @@ template< typename Real,
          typename Index >
template< typename InVector,
          typename OutVector >
__device__
__cuda_callable__
void EllpackSymmetric< Real, Device, Index >::spmvCuda( const InVector& inVector,
                                                           OutVector& outVector,
                                                           int rowId ) const
@@ -766,9 +752,7 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getRowBegin( const EllpackSymmetric< Real, Device, Index >& matrix,
                                const Index row )
      {
@@ -777,9 +761,7 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getRowEnd( const EllpackSymmetric< Real, Device, Index >& matrix,
                                const Index row )
      {
@@ -789,9 +771,7 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda >

      template< typename Real,
                typename Index >
#ifdef HAVE_CUDA
      __device__ __host__
#endif
      __cuda_callable__
      static Index getElementStep( const EllpackSymmetric< Real, Device, Index >& matrix )
      {
         return matrix.alignedRows;
+3 −11
Original line number Diff line number Diff line
@@ -55,10 +55,7 @@ public:
   __cuda_callable__
   IndexType getColumns() const;

#ifdef HAVE_CUDA
    __device__ __host__
#endif
    
   __cuda_callable__    
   const IndexType& getNumberOfColors() const;

   /****
@@ -110,14 +107,9 @@ public:

   bool help( bool verbose = false ) { return true;};

#ifdef  HAVE_CUDA
   __device__ __host__
#endif
   void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   Index getValuesSize() const;

   protected:
+9 −24
Original line number Diff line number Diff line
@@ -77,9 +77,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
   template< typename Real2, typename Device2, typename Index2 >
   bool operator != ( const SlicedEllpackSymmetric< Real2, Device2, Index2 >& matrix ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value );
@@ -88,9 +86,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
                    const IndexType column,
                    const RealType& value );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value,
@@ -101,9 +97,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
                    const RealType& value,
                    const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setRowFast( const IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
@@ -114,9 +108,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
                const RealType* values,
                const IndexType elements );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addRowFast( const IndexType row,
                    const IndexType* columns,
                    const RealType* values,
@@ -129,18 +121,15 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >
                const IndexType numberOfElements,
                const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   RealType getElementFast( const IndexType row,
                            const IndexType column ) const;

   RealType getElement( const IndexType row,
                        const IndexType column ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif

   __cuda_callable__
   void getRowFast( const IndexType row,
                    IndexType* columns,
                    RealType* values ) const;
@@ -151,9 +140,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >

   template< typename InVector,
             typename OutVector >
   #ifdef HAVE_CUDA
      __device__ __host__
   #endif
   __cuda_callable__
   void rowVectorProduct( const IndexType row,
                          const InVector& inVector,
                          OutVector& outVector ) const;
@@ -165,9 +152,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index >

   template< typename InVector,
             typename OutVector >
#ifdef HAVE_CUDA
   __device__
#endif
   __cuda_callable__
   void spmvCuda( const InVector& inVector,
                  OutVector& outVector,
                  int globalIdx ) const;
+10 −31
Original line number Diff line number Diff line
@@ -81,9 +81,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
             typename OutVector >
   void vectorProductHost( const InVector& inVector, OutVector& outVector ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value );
@@ -92,9 +90,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                    const IndexType column,
                    const RealType& value );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addElementFast( const IndexType row,
                        const IndexType column,
                        const RealType& value,
@@ -105,9 +101,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                    const RealType& value,
                    const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool setRowFast( const IndexType row,
                    const IndexType* columnIndexes,
                    const RealType* values,
@@ -118,9 +112,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                const RealType* values,
                const IndexType elements );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool addRowFast( const IndexType row,
                    const IndexType* columns,
                    const RealType* values,
@@ -133,18 +125,14 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                const IndexType numberOfElements,
                const RealType& thisElementMultiplicator = 1.0 );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   RealType getElementFast( const IndexType row,
                            const IndexType column ) const;

   RealType getElement( const IndexType row,
                        const IndexType column ) const;

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   void getRowFast( const IndexType row,
                    IndexType* columns,
                    RealType* values ) const;
@@ -154,9 +142,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                RealType* values ) const;

   template< typename Vector >
   #ifdef HAVE_CUDA
      __device__ __host__
   #endif
   __cuda_callable__
   typename Vector::RealType rowVectorProduct( const IndexType row,
                                               const Vector& vector ) const;

@@ -206,19 +192,12 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index >
                  const int color ) const;
#endif

#ifdef HAVE_CUDA
  __device__ __host__
#endif
    void copyFromHostToCuda( SlicedEllpackSymmetricGraph< Real, Devices::Host, Index, SliceSize >& matrix );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   bool rearrangeMatrix( bool verbose = false );

#ifdef HAVE_CUDA
   __device__ __host__
#endif
   __cuda_callable__
   void computePermutationArray();

   Containers::Vector< Index, Device, Index > getSlicePointers();
Loading