Newer
Older
Tomáš Oberhuber
committed
/***************************************************************************
tnlMatrix_impl.h - description
Tomáš Oberhuber
committed
-------------------
begin : Dec 18, 2013
copyright : (C) 2013 by Tomas Oberhuber
Tomáš Oberhuber
committed
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
Tomáš Oberhuber
committed
Tomáš Oberhuber
committed
#include <TNL/Assert.h>
template< typename Real,
typename Device,
typename Index >
tnlMatrix< Real, Device, Index >::tnlMatrix()
: rows( 0 ),
columns( 0 )
{
}
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::setDimensions( const IndexType rows,
const IndexType columns )
{
Assert( rows > 0 && columns > 0,
std::cerr << " rows = " << rows << " columns = " << columns );
this->rows = rows;
this->columns = columns;
return true;
}
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::getCompressedRowsLengths( Vectors::Vector< IndexType, DeviceType, IndexType >& rowLengths ) const
{
rowLengths.setSize( this->getRows() );
for( IndexType row = 0; row < this->getRows(); row++ )
rowLengths.setElement( row, this->getRowLength( row ) );
template< typename Real,
typename Device,
typename Index >
template< typename Real2,
typename Device2,
typename Index2 >
bool tnlMatrix< Real, Device, Index >::setLike( const tnlMatrix< Real2, Device2, Index2 >& matrix )
{
return setDimensions( matrix.getRows(), matrix.getColumns() );
}
template< typename Real,
typename Device,
typename Index >
Index tnlMatrix< Real, Device, Index >::getRows() const
{
return this->rows;
}
template< typename Real,
typename Device,
typename Index >
Index tnlMatrix< Real, Device, Index >::getColumns() const
{
return this->columns;
}
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::reset()
{
this->rows = 0;
this->columns = 0;
}
template< typename Real,
typename Device,
typename Index >
template< typename Matrix >
bool tnlMatrix< Real, Device, Index >::copyFrom( const Matrix& matrix,
const CompressedRowsLengthsVector& rowLengths )
/*tnlStaticAssert( DeviceType::DeviceType == Devices::HostDevice, );
tnlStaticAssert( DeviceType::DeviceType == Matrix:DeviceType::DeviceType, );*/
this->setLike( matrix );
if( ! this->setCompressedRowsLengths( rowLengths ) )
Vectors::Vector< RealType, Devices::Host, IndexType > values;
Vectors::Vector< IndexType, Devices::Host, IndexType > columns;
if( ! values.setSize( this->getColumns() ) ||
! columns.setSize( this->getColumns() ) )
return false;
for( IndexType row = 0; row < this->getRows(); row++ )
{
// TODO: fix this
//matrix.getRow( row, columns.getData(), values.getData() );
this->setRow( row, columns.getData(), values.getData(), rowLengths.getElement( row ) );
}
template< typename Real,
typename Device,
typename Index >
tnlMatrix< Real, Device, Index >& tnlMatrix< Real, Device, Index >::operator = ( const tnlMatrix< RealType, DeviceType, IndexType >& m )
{
this->setLike( m );
Vectors::Vector< IndexType, DeviceType, IndexType > rowLengths;
m.getCompressedRowsLengths( rowLengths );
this->setCompressedRowsLengths( rowLengths );
Vectors::Vector< RealType, DeviceType, IndexType > rowValues;
Vectors::Vector< IndexType, DeviceType, IndexType > rowColumns;
const IndexType maxRowLength = rowLengths.max();
rowValues.setSize( maxRowLength );
rowColumns.setSize( maxRowLength );
for( IndexType row = 0; row < this->getRows(); row++ )
{
rowColumns.getData(),
rowValues.getData() );
this->setRow( row,
rowColumns.getData(),
rowValues.getData(),
template< typename Real,
typename Device,
typename Index >
template< typename Matrix >
bool tnlMatrix< Real, Device, Index >::operator == ( const Matrix& matrix ) const
{
if( this->getRows() != matrix.getRows() ||
this->getColumns() != matrix.getColumns() )
return false;
for( IndexType row = 0; row < this->getRows(); row++ )
for( IndexType column = 0; column < this->getColumns(); column++ )
if( this->getElement( row, column ) != matrix.getElement( row, column ) )
return false;
return true;
}
template< typename Real,
typename Device,
typename Index >
template< typename Matrix >
bool tnlMatrix< Real, Device, Index >::operator != ( const Matrix& matrix ) const
{
return ! operator == ( matrix );
}
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::save( File& file ) const
if( ! Object::save( file ) ||
! file.write< IndexType, Devices::Host, Index >( &this->rows, 1 ) ||
! file.write< IndexType, Devices::Host, Index >( &this->columns, 1 ) ||
! this->values.save( file ) )
if( ! Object::save( file ) ||
! file.write( &this->columns ) ||
return true;
}
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::load( File& file )
if( ! Object::load( file ) ||
! file.read< IndexType, Devices::Host, Index >( &this->rows, 1 ) ||
! file.read< IndexType, Devices::Host, Index >( &this->columns, 1 ) ||
! this->values.load( file ) )
if( ! Object::load( file ) ||
! file.read( &this->columns ) ||
! this->values.load( file ) )
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::print( std::ostream& str ) const
#ifdef HAVE_CUDA
template< typename Matrix,
typename InVector,
typename OutVector >
__global__ void tnlMatrixVectorProductCudaKernel( const Matrix* matrix,
const InVector* inVector,
OutVector* outVector,
static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" );
const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx < matrix->getRows() )
( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
}
#endif
template< typename Matrix,
typename InVector,
typename OutVector >
void tnlMatrixVectorProductCuda( const Matrix& matrix,
const InVector& inVector,
OutVector& outVector )
typedef typename Matrix::IndexType IndexType;
Matrix* kernel_this = Devices::Cuda::passToDevice( matrix );
InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() );
const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() );
for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
{
if( gridIdx == cudaGrids - 1 )
cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize();
tnlMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
( kernel_this,
kernel_inVector,
kernel_outVector,
gridIdx );
Devices::Cuda::freeFromDevice( kernel_this );
Devices::Cuda::freeFromDevice( kernel_inVector );
Devices::Cuda::freeFromDevice( kernel_outVector );
checkCudaDevice;
#endif
}