Newer
Older
Tomáš Oberhuber
committed
/***************************************************************************
tnlMatrix_impl.h - description
Tomáš Oberhuber
committed
-------------------
begin : Dec 18, 2013
copyright : (C) 2013 by Tomas Oberhuber
Tomáš Oberhuber
committed
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************/
#ifndef TNLMATRIX_IMPL_H_
#define TNLMATRIX_IMPL_H_
Tomáš Oberhuber
committed
template< typename Real,
typename Device,
typename Index >
tnlMatrix< Real, Device, Index >::tnlMatrix()
: rows( 0 ),
columns( 0 )
{
}
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::setDimensions( const IndexType rows,
const IndexType columns )
{
tnlAssert( rows > 0 && columns > 0,
cerr << " rows = " << rows << " columns = " << columns );
this->rows = rows;
this->columns = columns;
return true;
}
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::getRowLengths( tnlVector< IndexType, DeviceType, IndexType >& rowLengths ) const
{
rowLengths.setSize( this->getRows() );
for( IndexType row = 0; row < this->getRows(); row++ )
rowLengths.setElement( row, this->getRowLength( row ) );
template< typename Real,
typename Device,
typename Index >
template< typename Real2,
typename Device2,
typename Index2 >
bool tnlMatrix< Real, Device, Index >::setLike( const tnlMatrix< Real2, Device2, Index2 >& matrix )
{
return setDimensions( matrix.getRows(), matrix.getColumns() );
}
template< typename Real,
typename Device,
typename Index >
#ifdef HAVE_CUDA
__device__ __host__
#endif
Index tnlMatrix< Real, Device, Index >::getRows() const
{
return this->rows;
}
template< typename Real,
typename Device,
typename Index >
#ifdef HAVE_CUDA
__device__ __host__
#endif
Index tnlMatrix< Real, Device, Index >::getColumns() const
{
return this->columns;
}
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::reset()
{
this->rows = 0;
this->columns = 0;
}
template< typename Real,
typename Device,
typename Index >
tnlMatrix< Real, Device, Index >& tnlMatrix< Real, Device, Index >::operator = ( const tnlMatrix< RealType, DeviceType, IndexType >& m )
{
this->setLike( m );
tnlVector< IndexType, DeviceType, IndexType > rowLengths;
m.getRowLengths( rowLengths );
this->setRowLengths( rowLengths );
tnlVector< RealType, DeviceType, IndexType > rowValues;
tnlVector< IndexType, DeviceType, IndexType > rowColumns;
const IndexType maxRowLength = rowLengths.max();
rowValues.setSize( maxRowLength );
rowColumns.setSize( maxRowLength );
for( IndexType row = 0; row < this->getRows(); row++ )
{
rowColumns.getData(),
rowValues.getData() );
this->setRow( row,
rowColumns.getData(),
rowValues.getData(),
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::save( tnlFile& file ) const
{
#ifdef HAVE_NOT_CXX11
if( ! tnlObject::save( file ) ||
! file.write< IndexType, tnlHost, Index >( &this->rows, 1 ) ||
! file.write< IndexType, tnlHost, Index >( &this->columns, 1 ) ||
! this->values.save( file ) )
if( ! tnlObject::save( file ) ||
! file.write( &this->rows ) ||
! file.write( &this->columns ) ||
return true;
}
template< typename Real,
typename Device,
typename Index >
bool tnlMatrix< Real, Device, Index >::load( tnlFile& file )
{
#ifdef HAVE_NOT_CXX11
if( ! tnlObject::load( file ) ||
! file.read< IndexType, tnlHost, Index >( &this->rows, 1 ) ||
! file.read< IndexType, tnlHost, Index >( &this->columns, 1 ) ||
! this->values.load( file ) )
if( ! tnlObject::load( file ) ||
! file.read( &this->rows ) ||
! file.read( &this->columns ) ||
! this->values.load( file ) )
template< typename Real,
typename Device,
typename Index >
void tnlMatrix< Real, Device, Index >::print( ostream& str ) const
{
}
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#ifdef HAVE_CUDA
template< typename Matrix,
typename Vector >
__global__ void tnlMatrixVectorProductCudaKernel( const Matrix* matrix,
const Vector* inVector,
Vector* outVector,
int gridIdx )
{
tnlStaticAssert( Matrix::DeviceType::DeviceType == tnlCudaDevice, );
const typename Matrix::IndexType rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
if( rowIdx < matrix->getRows() )
( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector );
}
#endif
template< typename Matrix,
typename Vector >
void tnlMatrixVectorProductCuda( const Matrix& matrix,
const Vector& inVector,
Vector& outVector )
{
#ifdef HAVE_CUDA
typedef typename Matrix::IndexType IndexType;
Matrix* kernel_this = tnlCuda::passToDevice( matrix );
Vector* kernel_inVector = tnlCuda::passToDevice( inVector );
Vector* kernel_outVector = tnlCuda::passToDevice( outVector );
dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
{
if( gridIdx == cudaGrids - 1 )
cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize();
tnlMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
( kernel_this,
kernel_inVector,
kernel_outVector,
gridIdx );
}
tnlCuda::freeFromDevice( kernel_this );
tnlCuda::freeFromDevice( kernel_inVector );
tnlCuda::freeFromDevice( kernel_outVector );
checkCudaDevice;
#endif
}
#endif /* TNLMATRIX_IMPL_H_ */