Newer
Older
/***************************************************************************
-------------------
begin : Dec 8, 2013
copyright : (C) 2013 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include <TNL/Math.h>
#include <TNL/Exceptions/NotImplementedError.h>
template< typename Real,
typename Device,
typename Index,
int SliceSize >
SlicedEllpack< Real, Device, Index, SliceSize >::SlicedEllpack()
{
};
template< typename Real,
typename Device,
typename Index,
int SliceSize >
String SlicedEllpack< Real, Device, Index, SliceSize >::getType()
return String( "Matrices::SlicedEllpack< ") +
String( TNL::getType< Real >() ) +
String( ", " ) +
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
String SlicedEllpack< Real, Device, Index, SliceSize >::getTypeVirtual() const
{
return this->getType();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType()
{
return getType();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationTypeVirtual() const
{
return this->getSerializationType();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Jakub Klinkovský
committed
void SlicedEllpack< Real, Device, Index, SliceSize >::setDimensions( const IndexType rows,
const IndexType columns )
std::cerr << "rows = " << rows
<< " columns = " << columns << std::endl );
Jakub Klinkovský
committed
Sparse< Real, Device, Index >::setDimensions( rows, columns );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" );
TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" );
TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" );
const IndexType slices = roundUpDivision( this->rows, SliceSize );
Jakub Klinkovský
committed
this->sliceCompressedRowLengths.setSize( slices );
this->slicePointers.setSize( slices + 1 );
DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths );
this->maxRowLength = rowLengths.max();
this->slicePointers.computeExclusivePrefixSum();
Jakub Klinkovský
committed
this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLength( const IndexType row ) const
const IndexType slice = row / SliceSize;
return this->sliceCompressedRowLengths.getElement( slice );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
__cuda_callable__
Index SlicedEllpack< Real, Device, Index, SliceSize >::getRowLengthFast( const IndexType row ) const
{
const IndexType slice = row / SliceSize;
return this->sliceCompressedRowLengths[ slice ];
}
template< typename Real,
typename Device,
typename Index ,
int SliceSize >
Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const
{
ConstMatrixRow matrixRow = getRow( row );
Lukas Cejka
committed
return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
Jakub Klinkovský
committed
void SlicedEllpack< Real, Device, Index, SliceSize >::setLike( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
Jakub Klinkovský
committed
Sparse< Real, Device, Index >::setLike( matrix );
this->slicePointers.setLike( matrix.slicePointers );
this->sliceCompressedRowLengths.setLike( matrix.sliceCompressedRowLengths );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::reset()
this->slicePointers.reset();
this->sliceCompressedRowLengths.reset();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
bool SlicedEllpack< Real, Device, Index, SliceSize >::operator == ( const SlicedEllpack< Real2, Device2, Index2 >& matrix ) const
TNL_ASSERT( this->getRows() == matrix.getRows() &&
this->getColumns() == matrix.getColumns(),
std::cerr << "this->getRows() = " << this->getRows()
<< " matrix.getRows() = " << matrix.getRows()
<< " this->getColumns() = " << this->getColumns()
<< " matrix.getColumns() = " << matrix.getColumns() );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
bool SlicedEllpack< Real, Device, Index, SliceSize >::operator != ( const SlicedEllpack< Real2, Device2, Index2 >& matrix ) const
{
return ! ( ( *this ) == matrix );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize >::setElementFast( const IndexType row,
const IndexType column,
const Real& value )
{
return this->addElementFast( row, column, value, 0.0 );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize >::setElement( const IndexType row,
const IndexType column,
const Real& value )
return this->addElement( row, column, value, 0.0 );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize >::addElementFast( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator )
TNL_ASSERT( row >= 0 && row < this->rows &&
Lukas Cejka
committed
column >= 0 && column <= this->columns,
<< " column = " << column
<< " this->rows = " << this->rows
<< " this->columns = " << this-> columns );
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd &&
( col = this->columnIndexes[ elementPtr ] ) < column &&
col != this->getPaddingIndex() ) elementPtr += step;
if( elementPtr == rowEnd )
this->values[ elementPtr ] = thisElementMultiplicator * this->values[ elementPtr ] + value;
if( col == this->getPaddingIndex() )
{
this->columnIndexes[ elementPtr ] = column;
this->values[ elementPtr ] = value;
return true;
}
IndexType j = rowEnd - step;
while( j > elementPtr )
{
this->columnIndexes[ j ] = this->columnIndexes[ j - step ];
this->values[ j ] = this->values[ j - step ];
j -= step;
}
this->columnIndexes[ elementPtr ] = column;
this->values[ elementPtr ] = value;
return true;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize >::addElement( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator )
{
TNL_ASSERT( row >= 0 && row < this->rows &&
Lukas Cejka
committed
column >= 0 && column <= this->columns,
<< " column = " << column
<< " this->rows = " << this->rows
<< " this->columns = " << this-> columns );
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd &&
( col = this->columnIndexes.getElement( elementPtr ) ) < column &&
col != this->getPaddingIndex() ) elementPtr += step;
if( elementPtr == rowEnd )
return false;
{
this->values.setElement( elementPtr, thisElementMultiplicator * this->values.getElement( elementPtr ) + value );
return true;
}
if( col == this->getPaddingIndex() )
{
this->columnIndexes.setElement( elementPtr, column );
this->values.setElement( elementPtr, value );
return true;
}
IndexType j = rowEnd - step;
while( j > elementPtr )
{
this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) );
this->values.setElement( j, this->values.getElement( j - step ) );
j -= step;
}
this->columnIndexes.setElement( elementPtr, column );
this->values.setElement( elementPtr, value );
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize > :: setRowFast( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements )
{
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowLengths[ sliceIdx ];
if( elements > rowLength )
return false;
Index elementPointer, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step );
for( IndexType i = 0; i < elements; i++ )
{
const IndexType column = columnIndexes[ i ];
if( column < 0 || column >= this->getColumns() )
return false;
this->columnIndexes[ elementPointer ] = columnIndexes[ i ];
this->values[ elementPointer ] = values[ i ];
for( IndexType i = elements; i < rowLength; i++ )
this->columnIndexes[ elementPointer ] = this->getPaddingIndex();
elementPointer += step;
}
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize > :: setRow( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements )
{
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowLengths.getElement( sliceIdx );
if( elements > rowLength )
return false;
Index elementPointer, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPointer, rowEnd, step );
for( IndexType i = 0; i < elements; i++ )
{
const IndexType column = columnIndexes[ i ];
if( column < 0 || column >= this->getColumns() )
return false;
this->columnIndexes.setElement( elementPointer, column );
this->values.setElement( elementPointer, values[ i ] );
elementPointer += step;
}
for( IndexType i = elements; i < rowLength; i++ )
{
this->columnIndexes.setElement( elementPointer, this->getPaddingIndex() );
elementPointer += step;
}
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize > :: addRowFast( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator )
{
// TODO: implement
return false;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool SlicedEllpack< Real, Device, Index, SliceSize > :: addRow( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator )
{
// TODO: implement
return false;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Real SlicedEllpack< Real, Device, Index, SliceSize >::getElementFast( const IndexType row,
const IndexType column ) const
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd &&
( col = this->columnIndexes[ elementPtr ] ) < column &&
col != this->getPaddingIndex() )
if( elementPtr < rowEnd && col == column )
return this->values[ elementPtr ];
return 0.0;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Real SlicedEllpack< Real, Device, Index, SliceSize >::getElement( const IndexType row,
const IndexType column ) const
{
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd &&
( col = this->columnIndexes.getElement( elementPtr ) ) < column &&
col != this->getPaddingIndex() )
if( elementPtr < rowEnd && col == column )
return this->values.getElement( elementPtr );
return 0.0;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::getRowFast( const IndexType row,
IndexType* columns,
RealType* values ) const
Index elementPtr, rowEnd, step, i( 0 );
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd )
{
columns[ i ] = this->columnIndexes[ elementPtr ];
values[ i ] = this->values[ elementPtr ];
elementPtr += step;
i++;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
typename SlicedEllpack< Real, Device, Index, SliceSize >::MatrixRow
SlicedEllpack< Real, Device, Index, SliceSize >::
getRow( const IndexType rowIndex )
{
Index rowBegin, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, rowIndex, rowBegin, rowEnd, step );
const IndexType slice = rowIndex / SliceSize;
return MatrixRow( &this->columnIndexes[ rowBegin ],
this->sliceCompressedRowLengths[ slice ],
step );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
typename SlicedEllpack< Real, Device, Index, SliceSize >::ConstMatrixRow
SlicedEllpack< Real, Device, Index, SliceSize >::
getRow( const IndexType rowIndex ) const
{
Index rowBegin, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, rowIndex, rowBegin, rowEnd, step );
const IndexType slice = rowIndex / SliceSize;
return ConstMatrixRow( &this->columnIndexes[ rowBegin ],
&this->values[ rowBegin ],
this->sliceCompressedRowLengths[ slice ],
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Vector >
typename Vector::RealType SlicedEllpack< Real, Device, Index, SliceSize >::rowVectorProduct( const IndexType row,
const Vector& vector ) const
Real result = 0.0;
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
IndexType column;
while( elementPtr < rowEnd &&
( column = this->columnIndexes[ elementPtr ] ) < this->columns &&
column != this->getPaddingIndex() )
result += this->values[ elementPtr ] * vector[ column ];
elementPtr += step;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename InVector,
typename OutVector >
void SlicedEllpack< Real, Device, Index, SliceSize >::vectorProduct( const InVector& inVector,
Jakub Klinkovský
committed
OutVector& outVector,
RealType multiplicator ) const
Jakub Klinkovský
committed
DeviceDependentCode::vectorProduct( *this, inVector, outVector, multiplicator );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Index2 >
void SlicedEllpack< Real, Device, Index, SliceSize >::addMatrix( const SlicedEllpack< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator,
const RealType& thisMatrixMultiplicator )
throw Exceptions::NotImplementedError( "SlicedEllpack::addMatrix is not implemented." );
// TODO: implement
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Index2 >
void SlicedEllpack< Real, Device, Index, SliceSize >::getTransposition( const SlicedEllpack< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator )
{
throw Exceptions::NotImplementedError( "SlicedEllpack::getTransposition is not implemented." );
// TODO: implement
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Vector1, typename Vector2 >
bool SlicedEllpack< Real, Device, Index, SliceSize >::performSORIteration( const Vector1& b,
const IndexType row,
Vector2& x,
const RealType& omega ) const
TNL_ASSERT( row >=0 && row < this->getRows(),
std::cerr << "row = " << row
<< " this->getRows() = " << this->getRows() << std::endl );
RealType diagonalValue( 0.0 );
RealType sum( 0.0 );
/*const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowLengths[ sliceIdx ];
IndexType elementPtr = this->slicePointers[ sliceIdx ] +
rowLength * ( row - sliceIdx * SliceSize );
const IndexType rowEnd( elementPtr + rowLength );*/
IndexType elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd && ( column = this->columnIndexes[ elementPtr ] ) < this->columns )
diagonalValue = this->values[ elementPtr ];
sum += this->values[ elementPtr ] * x[ column ];
elementPtr += step;
}
if( diagonalValue == ( Real ) 0.0 )
{
std::cerr << "There is zero on the diagonal in " << row << "-th row of a matrix. I cannot perform SOR iteration." << std::endl;
x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
// copy assignment
template< typename Real,
typename Device,
typename Index,
int SliceSize >
SlicedEllpack< Real, Device, Index, SliceSize >&
SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack& matrix )
{
this->setLike( matrix );
this->values = matrix.values;
this->columnIndexes = matrix.columnIndexes;
this->slicePointers = matrix.slicePointers;
this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
return *this;
}
// cross-device copy assignment
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2, typename Device2, typename Index2, typename >
SlicedEllpack< Real, Device, Index, SliceSize >&
SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix )
{
static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value || std::is_same< Device, Devices::MIC >::value,
static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value || std::is_same< Device2, Devices::MIC >::value,
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
"unknown device" );
this->setLike( matrix );
this->slicePointers = matrix.slicePointers;
this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths;
// host -> cuda
if( std::is_same< Device, Devices::Cuda >::value ) {
typename ValuesVector::HostType tmpValues;
typename ColumnIndexesVector::HostType tmpColumnIndexes;
tmpValues.setLike( matrix.values );
tmpColumnIndexes.setLike( matrix.columnIndexes );
#ifdef HAVE_OPENMP
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
#endif
for( Index sliceIdx = 0; sliceIdx < matrix.sliceCompressedRowLengths.getSize(); sliceIdx++ ) {
const Index rowLength = matrix.sliceCompressedRowLengths[ sliceIdx ];
const Index offset = matrix.slicePointers[ sliceIdx ];
for( Index j = 0; j < rowLength; j++ )
for( Index i = 0; i < SliceSize; i++ ) {
tmpValues[ offset + j * SliceSize + i ] = matrix.values[ offset + i * rowLength + j ];
tmpColumnIndexes[ offset + j * SliceSize + i ] = matrix.columnIndexes[ offset + i * rowLength + j ];
}
}
this->values = tmpValues;
this->columnIndexes = tmpColumnIndexes;
}
// cuda -> host
if( std::is_same< Device, Devices::Host >::value ) {
ValuesVector tmpValues;
ColumnIndexesVector tmpColumnIndexes;
tmpValues.setLike( matrix.values );
tmpColumnIndexes.setLike( matrix.columnIndexes );
tmpValues = matrix.values;
tmpColumnIndexes = matrix.columnIndexes;
#ifdef HAVE_OPENMP
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
#endif
for( Index sliceIdx = 0; sliceIdx < sliceCompressedRowLengths.getSize(); sliceIdx++ ) {
const Index rowLength = sliceCompressedRowLengths[ sliceIdx ];
const Index offset = slicePointers[ sliceIdx ];
for( Index i = 0; i < SliceSize; i++ )
for( Index j = 0; j < rowLength; j++ ) {
this->values[ offset + i * rowLength + j ] = tmpValues[ offset + j * SliceSize + i ];
this->columnIndexes[ offset + i * rowLength + j ] = tmpColumnIndexes[ offset + j * SliceSize + i ];
}
}
}
if( std::is_same< Device, Devices::MIC >::value ) {
throw Exceptions::NotImplementedError("Cross-device assignment for the SlicedEllpack format is not implemented for MIC.");
return *this;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::save( File& file ) const
Sparse< Real, Device, Index >::save( file );
Jakub Klinkovský
committed
file << this->slicePointers << this->sliceCompressedRowLengths;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::load( File& file )
Sparse< Real, Device, Index >::load( file );
Jakub Klinkovský
committed
file >> this->slicePointers >> this->sliceCompressedRowLengths;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::save( const String& fileName ) const
Object::save( fileName );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::load( const String& fileName )
Object::load( fileName );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) const
if( std::is_same< Device, Devices::Host >::value ) {
for( IndexType row = 0; row < this->getRows(); row++ )
str <<"Row: " << row << " -> ";
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowLengths.getElement( sliceIdx );
IndexType elementPtr = this->slicePointers.getElement( sliceIdx ) +
rowLength * ( row - sliceIdx * SliceSize );
const IndexType rowEnd( elementPtr + rowLength );
while( elementPtr < rowEnd &&
this->columnIndexes.getElement( elementPtr ) < this->columns &&
this->columnIndexes.getElement( elementPtr ) != this->getPaddingIndex() )
{
const Index column = this->columnIndexes.getElement( elementPtr );
str << " Col:" << column << "->" << this->values.getElement( elementPtr ) << "\t";
elementPtr++;
}
str << std::endl;
}
else {
HostType hostMatrix;
hostMatrix = *this;
hostMatrix.print( str );
#ifdef HAVE_CUDA
template< typename Real,
typename Device,
typename Index,
int SliceSize >
__device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths,
const IndexType sliceIdx )
{
Index rowIdx = sliceIdx * SliceSize;
Index rowInSliceIdx( 0 );
Index maxRowLength( 0 );
if( rowIdx >= this->getRows() )
return;
while( rowInSliceIdx < SliceSize && rowIdx < this->getRows() )
{
maxRowLength = max( maxRowLength, rowLengths[ rowIdx ] );
this->sliceCompressedRowLengths[ sliceIdx ] = maxRowLength;
this->slicePointers[ sliceIdx ] = maxRowLength * SliceSize;
if( threadIdx.x == 0 )
this->slicePointers[ this->slicePointers.getSize() - 1 ] = 0;
}
#endif
class SlicedEllpackDeviceDependentCode< Devices::Host >
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers.getElement( sliceIdx );
const Index rowLength = matrix.sliceCompressedRowLengths.getElement( sliceIdx );
rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize );
rowEnd = rowBegin + rowLength;
step = 1;
}
template< typename Real,
typename Index,
int SliceSize >
__cuda_callable__
static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers[ sliceIdx ];
const Index rowLength = matrix.sliceCompressedRowLengths[ sliceIdx ];
rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize );
rowEnd = rowBegin + rowLength;
step = 1;
template< typename Real,
typename Index,
int SliceSize >
static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths )
{
Index row( 0 ), slice( 0 ), sliceRowLength( 0 );
while( row < matrix.getRows() )
{
sliceRowLength = max( rowLengths.getElement( row++ ), sliceRowLength );
if( row % SliceSize == 0 )
{
matrix.sliceCompressedRowLengths.setElement( slice, sliceRowLength );
matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize );
sliceRowLength = 0;
}
}
if( row % SliceSize != 0 )
{
matrix.sliceCompressedRowLengths.setElement( slice, sliceRowLength );
matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize );
}
matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 );
template< typename Real,
typename Index,
typename InVector,
typename OutVector,
static void vectorProduct( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
Jakub Klinkovský
committed
OutVector& outVector,
Real multiplicator )
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
for( Index row = 0; row < matrix.getRows(); row ++ )
Jakub Klinkovský
committed
outVector[ row ] = matrix.rowVectorProduct( row, inVector ) * multiplicator;
};
#ifdef HAVE_CUDA
template< typename Real,
typename Index,
int SliceSize >
__global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix,
typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths,
int gridIdx )
const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx );
typename Real,
typename Index,
int SliceSize >
__global__ void SlicedEllpackVectorProductCudaKernel(
const Index rows,
const Index columns,
const Index* slicePointers,
const Index* sliceCompressedRowLengths,
const Index paddingIndex,
const Index* columnIndexes,
const Real* values,
const Real* inVector,
Real* outVector,
Jakub Klinkovský
committed
Real multiplicator,
const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx >= rows )
return;
const Index sliceIdx = rowIdx / SliceSize;
const Index slicePointer = slicePointers[ sliceIdx ];
const Index rowLength = sliceCompressedRowLengths[ sliceIdx ];
Index i = slicePointer + rowIdx - sliceIdx * SliceSize;
const Index rowEnd = i + rowLength * SliceSize;
Real result( 0.0 );
Index columnIndex;
while( i < rowEnd &&
( columnIndex = columnIndexes[ i ] ) < columns &&
columnIndex < paddingIndex )
{
result += values[ i ] * inVector[ columnIndex ];
i += SliceSize;
}
Jakub Klinkovský
committed
outVector[ rowIdx ] = result * multiplicator;
class SlicedEllpackDeviceDependentCode< Devices::Cuda >
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers.getElement( sliceIdx );
const Index rowLength = matrix.sliceCompressedRowLengths.getElement( sliceIdx );
rowBegin = slicePointer + row - sliceIdx * SliceSize;
rowEnd = rowBegin + rowLength * SliceSize;
step = SliceSize;
}
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers[ sliceIdx ];
const Index rowLength = matrix.sliceCompressedRowLengths[ sliceIdx ];
rowBegin = slicePointer + row - sliceIdx * SliceSize;
rowEnd = rowBegin + rowLength * SliceSize;
step = SliceSize;
}
template< typename Real,
typename Index,
int SliceSize >
static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix,
typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths )
typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix;
typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector;
Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix );