Loading src/TNL/Matrices/BiEllpack.h +10 −30 Original line number Diff line number Diff line Loading @@ -66,9 +66,7 @@ public: const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -78,9 +76,7 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -100,9 +96,7 @@ public: RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; Loading @@ -111,9 +105,7 @@ public: IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getGroupLength( const IndexType strip, const IndexType group ) const; Loading @@ -129,9 +121,7 @@ public: void setVirtualRows(const IndexType rows); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; Loading @@ -155,36 +145,26 @@ public: template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, /*const IndexType warpStart, const IndexType inWarpIdx*/ int globalIdx ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getStripLength( const IndexType strip ) const; #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ IndexType power( const IndexType number, const IndexType exponent ) const; Loading src/TNL/Matrices/BiEllpackSymmetric.h +10 −30 Original line number Diff line number Diff line Loading @@ -56,9 +56,7 @@ public: const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -68,9 +66,7 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -90,9 +86,7 @@ public: RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; Loading @@ -100,9 +94,7 @@ public: IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getGroupLength( const IndexType strip, const IndexType group ) const; Loading @@ -118,9 +110,7 @@ public: void setVirtualRows(const IndexType rows); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; Loading @@ -144,36 +134,26 @@ public: template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, /*const IndexType warpStart, const IndexType inWarpIdx*/ int globalIdx ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getStripLength( const IndexType strip ) const; #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void performRowBubbleSortCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void computeColumnSizesCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ IndexType power( const IndexType number, const IndexType exponent ) const; Loading src/TNL/Matrices/BiEllpackSymmetric_impl.h +7 −21 Original line number Diff line number Diff line Loading @@ -22,9 +22,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::power( const IndexType number, const IndexType exponent ) const { Loading Loading @@ -127,9 +125,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const { TNL_ASSERT( strip >= 0, Loading @@ -144,9 +140,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const { TNL_ASSERT( row >=0 && row < this->getRows(), Loading Loading @@ -251,9 +245,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setElementFast( const IndexType row, const IndexType column, const RealType& value ) Loading Loading @@ -311,9 +303,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading Loading @@ -488,9 +478,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Real BiEllpackSymmetric< Real, Device, Index, StripSize >::getElementFast( const IndexType row, const IndexType column ) const { Loading Loading @@ -584,9 +572,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { Loading src/TNL/Matrices/BiEllpack_impl.h +9 −23 Original line number Diff line number Diff line Loading @@ -24,9 +24,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::power( const IndexType number, const IndexType exponent ) const { Loading Loading @@ -131,9 +129,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const { TNL_ASSERT( strip >= 0, Loading @@ -148,9 +144,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const { TNL_ASSERT( row >=0 && row < this->getRows(), Loading Loading @@ -256,9 +250,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpack< Real, Device, Index, StripSize >::setElementFast( const IndexType row, const IndexType column, const RealType& value ) Loading Loading @@ -316,9 +308,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpack< Real, Device, Index, StripSize >::addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading Loading @@ -492,9 +482,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Real BiEllpack< Real, Device, Index, StripSize >::getElementFast( const IndexType row, const IndexType column ) const { Loading Loading @@ -588,9 +576,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { Loading Loading @@ -1321,7 +1307,7 @@ template< typename Real, typename Device, typename Index, int StripSize > __device__ __cuda_callable__ void BiEllpack< Real, Device, Index, StripSize >::performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ) { Loading Loading @@ -1378,7 +1364,7 @@ template< typename Real, typename Device, typename Index, int StripSize > __device__ __cuda_callable__ void BiEllpack< Real, Device, Index, StripSize >::computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ) Loading src/TNL/Matrices/EllpackSymmetric.h +9 −25 Original line number Diff line number Diff line Loading @@ -65,9 +65,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > bool copyFrom( const Matrix& matrix, const CompressedRowLengthsVector& rowLengths );*/ #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -76,9 +74,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -90,9 +86,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setRowFast( const IndexType row, const IndexType* columnIndexes, const RealType* values, Loading @@ -104,9 +98,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType elements ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addRowFast( const IndexType row, const IndexType* columns, const RealType* values, Loading @@ -119,18 +111,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType numberOfElements, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ void getRowFast( const IndexType row, IndexType* columns, RealType* values ) const; Loading @@ -140,9 +128,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > RealType* values ) const; template< typename Vector > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; Loading Loading @@ -183,9 +169,7 @@ template< typename Vector > template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, int rowIdx ) const; Loading Loading
src/TNL/Matrices/BiEllpack.h +10 −30 Original line number Diff line number Diff line Loading @@ -66,9 +66,7 @@ public: const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -78,9 +76,7 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -100,9 +96,7 @@ public: RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; Loading @@ -111,9 +105,7 @@ public: IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getGroupLength( const IndexType strip, const IndexType group ) const; Loading @@ -129,9 +121,7 @@ public: void setVirtualRows(const IndexType rows); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; Loading @@ -155,36 +145,26 @@ public: template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, /*const IndexType warpStart, const IndexType inWarpIdx*/ int globalIdx ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getStripLength( const IndexType strip ) const; #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ IndexType power( const IndexType number, const IndexType exponent ) const; Loading
src/TNL/Matrices/BiEllpackSymmetric.h +10 −30 Original line number Diff line number Diff line Loading @@ -56,9 +56,7 @@ public: const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -68,9 +66,7 @@ public: const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -90,9 +86,7 @@ public: RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; Loading @@ -100,9 +94,7 @@ public: IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getGroupLength( const IndexType strip, const IndexType group ) const; Loading @@ -118,9 +110,7 @@ public: void setVirtualRows(const IndexType rows); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; Loading @@ -144,36 +134,26 @@ public: template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, /*const IndexType warpStart, const IndexType inWarpIdx*/ int globalIdx ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ IndexType getStripLength( const IndexType strip ) const; #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void performRowBubbleSortCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void computeColumnSizesCudaKernel( const typename BiEllpackSymmetric< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ); #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ IndexType power( const IndexType number, const IndexType exponent ) const; Loading
src/TNL/Matrices/BiEllpackSymmetric_impl.h +7 −21 Original line number Diff line number Diff line Loading @@ -22,9 +22,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::power( const IndexType number, const IndexType exponent ) const { Loading Loading @@ -127,9 +125,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const { TNL_ASSERT( strip >= 0, Loading @@ -144,9 +140,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const { TNL_ASSERT( row >=0 && row < this->getRows(), Loading Loading @@ -251,9 +245,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setElementFast( const IndexType row, const IndexType column, const RealType& value ) Loading Loading @@ -311,9 +303,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading Loading @@ -488,9 +478,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Real BiEllpackSymmetric< Real, Device, Index, StripSize >::getElementFast( const IndexType row, const IndexType column ) const { Loading Loading @@ -584,9 +572,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { Loading
src/TNL/Matrices/BiEllpack_impl.h +9 −23 Original line number Diff line number Diff line Loading @@ -24,9 +24,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::power( const IndexType number, const IndexType exponent ) const { Loading Loading @@ -131,9 +129,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getStripLength( const IndexType strip ) const { TNL_ASSERT( strip >= 0, Loading @@ -148,9 +144,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const IndexType row ) const { TNL_ASSERT( row >=0 && row < this->getRows(), Loading Loading @@ -256,9 +250,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpack< Real, Device, Index, StripSize >::setElementFast( const IndexType row, const IndexType column, const RealType& value ) Loading Loading @@ -316,9 +308,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool BiEllpack< Real, Device, Index, StripSize >::addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading Loading @@ -492,9 +482,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Real BiEllpack< Real, Device, Index, StripSize >::getElementFast( const IndexType row, const IndexType column ) const { Loading Loading @@ -588,9 +576,7 @@ template< typename Real, typename Device, typename Index, int StripSize > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { Loading Loading @@ -1321,7 +1307,7 @@ template< typename Real, typename Device, typename Index, int StripSize > __device__ __cuda_callable__ void BiEllpack< Real, Device, Index, StripSize >::performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType strip ) { Loading Loading @@ -1378,7 +1364,7 @@ template< typename Real, typename Device, typename Index, int StripSize > __device__ __cuda_callable__ void BiEllpack< Real, Device, Index, StripSize >::computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths, const IndexType numberOfStrips, const IndexType strip ) Loading
src/TNL/Matrices/EllpackSymmetric.h +9 −25 Original line number Diff line number Diff line Loading @@ -65,9 +65,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > bool copyFrom( const Matrix& matrix, const CompressedRowLengthsVector& rowLengths );*/ #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setElementFast( const IndexType row, const IndexType column, const RealType& value ); Loading @@ -76,9 +74,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType column, const RealType& value ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addElementFast( const IndexType row, const IndexType column, const RealType& value, Loading @@ -90,9 +86,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool setRowFast( const IndexType row, const IndexType* columnIndexes, const RealType* values, Loading @@ -104,9 +98,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType elements ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ bool addRowFast( const IndexType row, const IndexType* columns, const RealType* values, Loading @@ -119,18 +111,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > const IndexType numberOfElements, const RealType& thisElementMultiplicator = 1.0 ); #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ RealType getElementFast( const IndexType row, const IndexType column ) const; RealType getElement( const IndexType row, const IndexType column ) const; #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ void getRowFast( const IndexType row, IndexType* columns, RealType* values ) const; Loading @@ -140,9 +128,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > RealType* values ) const; template< typename Vector > #ifdef HAVE_CUDA __device__ __host__ #endif __cuda_callable__ typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; Loading Loading @@ -183,9 +169,7 @@ template< typename Vector > template< typename InVector, typename OutVector > #ifdef HAVE_CUDA __device__ #endif __cuda_callable__ void spmvCuda( const InVector& inVector, OutVector& outVector, int rowIdx ) const; Loading