Newer
Older
/***************************************************************************
-------------------
begin : Dec 8, 2013
copyright : (C) 2013 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
/***
* Authors:
* Oberhuber Tomas, tomas.oberhuber@fjfi.cvut.cz
* Vacata Jan
* The algorithm/method was published in:
* Oberhuber T., Suzuki A., Vacata J., New Row-grouped CSR format for storing
* the sparse matrices on GPU with implementation in CUDA, Acta Technica, 2011,
* vol. 56, no. 4, pp. 447-466.
*/
template< typename Device >
typename Index = int,
int SliceSize = 32 >
#ifdef HAVE_CUDA
template< typename Real,
typename Index,
int SliceSize >
__global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix,
typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
int gridIdx );
#endif
template< typename Real,
typename Device,
typename Index,
int SliceSize >
class SlicedEllpack : public Sparse< Real, Device, Index >
private:
// convenient template alias for controlling the selection of copy-assignment operator
template< typename Device2 >
using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
// friend class will be needed for templated assignment operators
template< typename Real2, typename Device2, typename Index2, int SliceSize2 >
friend class SlicedEllpack;
public:
typedef Real RealType;
typedef Device DeviceType;
typedef Index IndexType;
typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector;
typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
typedef SlicedEllpack< Real, Devices::Host, Index, SliceSize > HostType;
typedef SlicedEllpack< Real, Devices::Cuda, Index, SliceSize > CudaType;
typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
static String getType();
String getTypeVirtual() const;
static String getSerializationType();
virtual String getSerializationTypeVirtual() const;
Jakub Klinkovský
committed
void setDimensions( const IndexType rows,
void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths );
IndexType getRowLength( const IndexType row ) const;
__cuda_callable__
IndexType getRowLengthFast( const IndexType row ) const;
IndexType getNonZeroRowLength( const IndexType row ) const;
template< typename Real2, typename Device2, typename Index2 >
Jakub Klinkovský
committed
void setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
void reset();
template< typename Real2, typename Device2, typename Index2 >
bool operator == ( const SlicedEllpack< Real2, Device2, Index2 >& matrix ) const;
template< typename Real2, typename Device2, typename Index2 >
bool operator != ( const SlicedEllpack< Real2, Device2, Index2 >& matrix ) const;
bool setElementFast( const IndexType row,
const IndexType column,
const RealType& value );
bool setElement( const IndexType row,
const IndexType column,
const RealType& value );
bool addElementFast( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator = 1.0 );
bool addElement( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator = 1.0 );
bool setRowFast( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements );
bool setRow( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements );
bool addRowFast( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator = 1.0 );
bool addRow( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator = 1.0 );
RealType getElementFast( const IndexType row,
const IndexType column ) const;
RealType getElement( const IndexType row,
const IndexType column ) const;
void getRowFast( const IndexType row,
IndexType* columns,
RealType* values ) const;
MatrixRow getRow( const IndexType rowIndex );
ConstMatrixRow getRow( const IndexType rowIndex ) const;
template< typename Vector >
typename Vector::RealType rowVectorProduct( const IndexType row,
const Vector& vector ) const;
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
Jakub Klinkovský
committed
OutVector& outVector,
RealType multiplicator = 1.0 ) const;
template< typename Real2, typename Index2 >
void addMatrix( const SlicedEllpack< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator = 1.0,
const RealType& thisMatrixMultiplicator = 1.0 );
template< typename Real2, typename Index2 >
void getTransposition( const SlicedEllpack< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator = 1.0 );
template< typename Vector1, typename Vector2 >
bool performSORIteration( const Vector1& b,
const RealType& omega = 1.0 ) const;
// copy assignment
SlicedEllpack& operator=( const SlicedEllpack& matrix );
// cross-device copy assignment
template< typename Real2, typename Device2, typename Index2,
typename = typename Enabler< Device2 >::type >
SlicedEllpack& operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix );
void save( File& file ) const;
void load( File& file );
void save( const String& fileName ) const;
void load( const String& fileName );
Containers::Vector< Index, Device, Index > slicePointers, sliceCompressedRowLengths;
typedef SlicedEllpackDeviceDependentCode< DeviceType > DeviceDependentCode;
friend class SlicedEllpackDeviceDependentCode< DeviceType >;
/*friend __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize >( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix,
const typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::CompressedRowLengthsVector* rowLengths,
int gridIdx );
*/
// TODO: The friend declaration above does not work because of __global__ storage specifier. Therefore we declare the following method as public. Fix this, when possible.
__device__ void computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths,