Newer
Older
/***************************************************************************
tnlSlicedSlicedEllpackMatrix_impl.h - description
-------------------
begin : Dec 8, 2013
copyright : (C) 2013 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef TNLSLICEDELLPACKMATRIX_IMPL_H_
#define TNLSLICEDELLPACKMATRIX_IMPL_H_
#include <matrices/tnlSlicedEllpackMatrix.h>
#include <core/vectors/tnlVector.h>
#include <core/mfuncs.h>
template< typename Real,
typename Device,
typename Index,
int SliceSize >
tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::tnlSlicedEllpackMatrix()
{
};
template< typename Real,
typename Device,
typename Index,
int SliceSize >
tnlString tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getType()
{
return tnlString( "tnlSlicedEllpackMatrix< ") +
tnlString( ", " ) +
Device :: getDeviceType() +
tnlString( " >" );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
tnlString tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getTypeVirtual() const
{
return this->getType();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setDimensions( const IndexType rows,
const IndexType columns )
{
tnlAssert( rows > 0 && columns > 0,
cerr << "rows = " << rows
<< " columns = " << columns << endl );
return tnlSparseMatrix< Real, Device, Index >::setDimensions( rows, columns );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setCompressedRowsLengths( const CompressedRowsLengthsVector& rowLengths )
tnlAssert( this->getRows() > 0, );
tnlAssert( this->getColumns() > 0, );
const IndexType slices = roundUpDivision( this->rows, SliceSize );
if( ! this->sliceCompressedRowsLengths.setSize( slices ) ||
! this->slicePointers.setSize( slices + 1 ) )
return false;
DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths );
this->maxRowLength = rowLengths.max();
this->slicePointers.computeExclusivePrefixSum();
return this->allocateMatrixElements( this->slicePointers.getElement( slices ) );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Index tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRowLength( const IndexType row ) const
{
const IndexType slice = roundUpDivision( row, SliceSize );
return this->sliceCompressedRowsLengths.getElement( slice );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setLike( const tnlSlicedEllpackMatrix< Real2, Device2, Index2, SliceSize >& matrix )
if( !tnlSparseMatrix< Real, Device, Index >::setLike( matrix ) ||
! this->slicePointers.setLike( matrix.slicePointers ) ||
! this->sliceCompressedRowsLengths.setLike( matrix.sliceCompressedRowsLengths ) )
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::reset()
tnlSparseMatrix< Real, Device, Index >::reset();
this->slicePointers.reset();
this->sliceCompressedRowsLengths.reset();
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::operator == ( const tnlSlicedEllpackMatrix< Real2, Device2, Index2 >& matrix ) const
{
tnlAssert( this->getRows() == matrix.getRows() &&
this->getColumns() == matrix.getColumns(),
cerr << "this->getRows() = " << this->getRows()
<< " matrix.getRows() = " << matrix.getRows()
<< " this->getColumns() = " << this->getColumns()
<< " matrix.getColumns() = " << matrix.getColumns()
<< " this->getName() = " << this->getName()
<< " matrix.getName() = " << matrix.getName() );
// TODO: implement this
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Device2,
typename Index2 >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::operator != ( const tnlSlicedEllpackMatrix< Real2, Device2, Index2 >& matrix ) const
{
return ! ( ( *this ) == matrix );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setElementFast( const IndexType row,
const IndexType column,
const Real& value )
{
return this->addElementFast( row, column, value, 0.0 );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setElement( const IndexType row,
const IndexType column,
const Real& value )
return this->addElement( row, column, value, 0.0 );
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElementFast( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator )
tnlAssert( row >= 0 && row < this->rows &&
column >= 0 && column <= this->rows,
cerr << " row = " << row
<< " column = " << column
<< " this->rows = " << this->rows
<< " this->columns = " << this-> columns );
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
IndexType col;
while( elementPtr < rowEnd &&
( col = this->columnIndexes[ elementPtr ] ) < column &&
col != this->getPaddingIndex() ) elementPtr += step;
if( elementPtr == rowEnd )
this->values[ elementPtr ] = thisElementMultiplicator * this->values[ elementPtr ] + value;
if( col == this->getPaddingIndex() )
{
this->columnIndexes[ elementPtr ] = column;
this->values[ elementPtr ] = value;
return true;
}
IndexType j = rowEnd - step;
while( j > elementPtr )
{
this->columnIndexes[ j ] = this->columnIndexes[ j - step ];
this->values[ j ] = this->values[ j - step ];
j -= step;
}
this->columnIndexes[ elementPtr ] = column;
this->values[ elementPtr ] = value;
return true;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElement( const IndexType row,
const IndexType column,
const RealType& value,
const RealType& thisElementMultiplicator )
{
tnlAssert( row >= 0 && row < this->rows &&
column >= 0 && column <= this->rows,
cerr << " row = " << row
<< " column = " << column
<< " this->rows = " << this->rows
<< " this->columns = " << this-> columns );
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step );
IndexType col;
while( elementPtr < rowEnd &&
( col = this->columnIndexes.getElement( elementPtr ) ) < column &&
col != this->getPaddingIndex() ) elementPtr += step;
if( elementPtr == rowEnd )
return false;
{
this->values.setElement( elementPtr, thisElementMultiplicator * this->values.getElement( elementPtr ) + value );
return true;
}
if( col == this->getPaddingIndex() )
{
this->columnIndexes.setElement( elementPtr, column );
this->values.setElement( elementPtr, value );
return true;
}
IndexType j = rowEnd - step;
while( j > elementPtr )
{
this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) );
this->values.setElement( j, this->values.getElement( j - step ) );
j -= step;
}
this->columnIndexes.setElement( elementPtr, column );
this->values.setElement( elementPtr, value );
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRowFast( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements )
{
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowsLengths[ sliceIdx ];
if( elements > rowLength )
return false;
Index elementPointer, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step );
for( IndexType i = 0; i < elements; i++ )
{
const IndexType column = columnIndexes[ i ];
if( column < 0 || column >= this->getColumns() )
return false;
this->columnIndexes[ elementPointer ] = columnIndexes[ i ];
this->values[ elementPointer ] = values[ i ];
for( IndexType i = elements; i < rowLength; i++ )
this->columnIndexes[ elementPointer ] = this->getPaddingIndex();
elementPointer += step;
}
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRow( const IndexType row,
const IndexType* columnIndexes,
const RealType* values,
const IndexType elements )
{
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowsLengths.getElement( sliceIdx );
if( elements > rowLength )
return false;
Index elementPointer, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPointer, rowEnd, step );
for( IndexType i = 0; i < elements; i++ )
{
const IndexType column = columnIndexes[ i ];
if( column < 0 || column >= this->getColumns() )
return false;
this->columnIndexes.setElement( elementPointer, column );
this->values.setElement( elementPointer, values[ i ] );
elementPointer += step;
}
for( IndexType i = elements; i < rowLength; i++ )
{
this->columnIndexes.setElement( elementPointer, this->getPaddingIndex() );
elementPointer += step;
}
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: addRowFast( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator )
{
// TODO: implement
return false;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: addRow( const IndexType row,
const IndexType* columns,
const RealType* values,
const IndexType numberOfElements,
const RealType& thisElementMultiplicator )
{
// TODO: implement
return false;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElementFast( const IndexType row,
const IndexType column ) const
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
IndexType col;
while( elementPtr < rowEnd &&
( col = this->columnIndexes[ elementPtr ] ) < column &&
col != this->getPaddingIndex() )
if( elementPtr < rowEnd && col == column )
return this->values[ elementPtr ];
return 0.0;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElement( const IndexType row,
const IndexType column ) const
{
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step );
IndexType col;
while( elementPtr < rowEnd &&
( col = this->columnIndexes.getElement( elementPtr ) ) < column &&
col != this->getPaddingIndex() )
if( elementPtr < rowEnd && col == column )
return this->values.getElement( elementPtr );
return 0.0;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRowFast( const IndexType row,
IndexType* columns,
RealType* values ) const
Index elementPtr, rowEnd, step, i( 0 );
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd )
{
columns[ i ] = this->columnIndexes[ elementPtr ];
values[ i ] = this->values[ elementPtr ];
elementPtr += step;
i++;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
typename tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::MatrixRow
tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::
getRow( const IndexType rowIndex )
{
Index rowBegin, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, rowIndex, rowBegin, rowEnd, step );
const IndexType slice = roundUpDivision( rowIndex, SliceSize );
return MatrixRow( &this->columnIndexes[ rowBegin ],
this->sliceCompressedRowsLengths[ slice ],
step );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
const typename tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::MatrixRow
tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::
getRow( const IndexType rowIndex ) const
{
Index rowBegin, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, rowIndex, rowBegin, rowEnd, step );
const IndexType slice = roundUpDivision( rowIndex, SliceSize );
return MatrixRow( &this->columnIndexes[ rowBegin ],
this->sliceCompressedRowsLengths[ slice ],
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Vector >
typename Vector::RealType tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::rowVectorProduct( const IndexType row,
const Vector& vector ) const
Real result = 0.0;
Index elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
IndexType column;
while( elementPtr < rowEnd &&
( column = this->columnIndexes[ elementPtr ] ) < this->columns &&
column != this->getPaddingIndex() )
result += this->values[ elementPtr ] * vector[ column ];
elementPtr += step;
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename InVector,
typename OutVector >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const InVector& inVector,
OutVector& outVector ) const
DeviceDependentCode::vectorProduct( *this, inVector, outVector );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Index2 >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addMatrix( const tnlSlicedEllpackMatrix< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator,
const RealType& thisMatrixMultiplicator )
{
tnlAssert( false, cerr << "TODO: implement" );
// TODO: implement
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
template< typename Real2,
typename Index2 >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getTransposition( const tnlSlicedEllpackMatrix< Real2, Device, Index2 >& matrix,
const RealType& matrixMultiplicator )
{
tnlAssert( false, cerr << "TODO: implement" );
// TODO: implement
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::performSORIteration( const Vector& b,
const IndexType row,
Vector& x,
const RealType& omega ) const
tnlAssert( row >=0 && row < this->getRows(),
cerr << "row = " << row
<< " this->getRows() = " << this->getRows()
<< " this->getName() = " << this->getName() << endl );
RealType diagonalValue( 0.0 );
RealType sum( 0.0 );
/*const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowsLengths[ sliceIdx ];
IndexType elementPtr = this->slicePointers[ sliceIdx ] +
rowLength * ( row - sliceIdx * SliceSize );
const IndexType rowEnd( elementPtr + rowLength );*/
IndexType elementPtr, rowEnd, step;
DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step );
while( elementPtr < rowEnd && ( column = this->columnIndexes[ elementPtr ] ) < this->columns )
diagonalValue = this->values[ elementPtr ];
sum += this->values[ elementPtr ] * x[ column ];
elementPtr += step;
}
if( diagonalValue == ( Real ) 0.0 )
{
cerr << "There is zero on the diagonal in " << row << "-th row of the matrix " << this->getName() << ". I cannot perform SOR iteration." << endl;
x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum );
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::save( tnlFile& file ) const
if( ! tnlSparseMatrix< Real, Device, Index >::save( file ) ||
! this->slicePointers.save( file ) ||
! this->sliceCompressedRowsLengths.save( file ) )
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::load( tnlFile& file )
if( ! tnlSparseMatrix< Real, Device, Index >::load( file ) ||
! this->slicePointers.load( file ) ||
! this->sliceCompressedRowsLengths.load( file ) )
return true;
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::save( const tnlString& fileName ) const
{
return tnlObject::save( fileName );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::load( const tnlString& fileName )
{
return tnlObject::load( fileName );
}
template< typename Real,
typename Device,
typename Index,
int SliceSize >
void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::print( ostream& str ) const
{
for( IndexType row = 0; row < this->getRows(); row++ )
{
str <<"Row: " << row << " -> ";
const IndexType sliceIdx = row / SliceSize;
const IndexType rowLength = this->sliceCompressedRowsLengths.getElement( sliceIdx );
IndexType elementPtr = this->slicePointers.getElement( sliceIdx ) +
rowLength * ( row - sliceIdx * SliceSize );
const IndexType rowEnd( elementPtr + rowLength );
while( elementPtr < rowEnd &&
this->columnIndexes.getElement( elementPtr ) < this->columns &&
this->columnIndexes.getElement( elementPtr ) != this->getPaddingIndex() )
const Index column = this->columnIndexes.getElement( elementPtr );
str << " Col:" << column << "->" << this->values.getElement( elementPtr ) << "\t";
elementPtr++;
#ifdef HAVE_CUDA
template< typename Real,
typename Device,
typename Index,
int SliceSize >
__device__ void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( const CompressedRowsLengthsVector& rowLengths,
const IndexType sliceIdx )
{
Index rowIdx = sliceIdx * SliceSize;
Index rowInSliceIdx( 0 );
Index maxRowLength( 0 );
if( rowIdx >= this->getRows() )
return;
while( rowInSliceIdx < SliceSize && rowIdx < this->getRows() )
{
maxRowLength = Max( maxRowLength, rowLengths[ rowIdx ] );
rowIdx++;
rowInSliceIdx++;
}
this->sliceCompressedRowsLengths[ sliceIdx ] = maxRowLength;
this->slicePointers[ sliceIdx ] = maxRowLength * SliceSize;
if( threadIdx.x == 0 )
this->slicePointers[ this->slicePointers.getSize() - 1 ] = 0;
}
#endif
template<>
class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost >
{
public:
typedef tnlHost Device;
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers.getElement( sliceIdx );
const Index rowLength = matrix.sliceCompressedRowsLengths.getElement( sliceIdx );
rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize );
rowEnd = rowBegin + rowLength;
step = 1;
}
template< typename Real,
typename Index,
int SliceSize >
__cuda_callable__
static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers[ sliceIdx ];
const Index rowLength = matrix.sliceCompressedRowsLengths[ sliceIdx ];
rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize );
rowEnd = rowBegin + rowLength;
step = 1;
template< typename Real,
typename Index,
int SliceSize >
static bool computeMaximalRowLengthInSlices( tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const typename tnlSlicedEllpackMatrix< Real, Device, Index >::CompressedRowsLengthsVector& rowLengths )
{
Index row( 0 ), slice( 0 ), sliceRowLength( 0 );
while( row < matrix.getRows() )
{
sliceRowLength = Max( rowLengths.getElement( row++ ), sliceRowLength );
if( row % SliceSize == 0 )
{
matrix.sliceCompressedRowsLengths.setElement( slice, sliceRowLength );
matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize );
sliceRowLength = 0;
}
}
if( row % SliceSize != 0 )
{
matrix.sliceCompressedRowsLengths.setElement( slice, sliceRowLength );
matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize );
}
matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 );
template< typename Real,
typename Index,
typename InVector,
typename OutVector,
int SliceSize >
static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const InVector& inVector,
OutVector& outVector )
#ifdef HAVE_OPENMP
#pragma omp parallel for
#endif
for( Index row = 0; row < matrix.getRows(); row ++ )
outVector[ row ] = matrix.rowVectorProduct( row, inVector );
}
};
#ifdef HAVE_CUDA
template< typename Real,
typename Index,
int SliceSize >
__global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel( tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >* matrix,
const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::CompressedRowsLengthsVector* rowLengths,
const Index sliceIdx = gridIdx * tnlCuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
matrix->computeMaximalRowLengthInSlicesCuda( *rowLengths, sliceIdx );
}
#endif
template<>
class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
{
public:
typedef tnlCuda Device;
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers.getElement( sliceIdx );
const Index rowLength = matrix.sliceCompressedRowsLengths.getElement( sliceIdx );
rowBegin = slicePointer + row - sliceIdx * SliceSize;
rowEnd = rowBegin + rowLength * SliceSize;
step = SliceSize;
}
template< typename Real,
typename Index,
int SliceSize >
static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const Index row,
Index& rowBegin,
Index& rowEnd,
Index& step )
const Index sliceIdx = row / SliceSize;
const Index slicePointer = matrix.slicePointers[ sliceIdx ];
const Index rowLength = matrix.sliceCompressedRowsLengths[ sliceIdx ];
rowBegin = slicePointer + row - sliceIdx * SliceSize;
rowEnd = rowBegin + rowLength * SliceSize;
step = SliceSize;
}
template< typename Real,
typename Index,
int SliceSize >
static bool computeMaximalRowLengthInSlices( tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const typename tnlSlicedEllpackMatrix< Real, Device, Index >::CompressedRowsLengthsVector& rowLengths )
{
#ifdef HAVE_CUDA
typedef tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > Matrix;
typedef typename Matrix::CompressedRowsLengthsVector CompressedRowsLengthsVector;
Matrix* kernel_matrix = tnlCuda::passToDevice( matrix );
CompressedRowsLengthsVector* kernel_rowLengths = tnlCuda::passToDevice( rowLengths );
const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize );
dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
const Index cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
{
if( gridIdx == cudaGrids - 1 )
cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize();
tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>>
( kernel_matrix,
kernel_rowLengths,
gridIdx );
}
tnlCuda::freeFromDevice( kernel_matrix );
tnlCuda::freeFromDevice( kernel_rowLengths );
checkCudaDevice;
#endif
template< typename Real,
typename Index,
typename InVector,
typename OutVector,
int SliceSize >
static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix,
const InVector& inVector,
OutVector& outVector )
{
tnlMatrixVectorProductCuda( matrix, inVector, outVector );
}
#endif /* TNLSLICEDELLPACKMATRIX_IMPL_H_ */