Skip to content
Snippets Groups Projects
Commit bc9a2b30 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Refactoring diagonal preconditioner

parent 663f3b2d
No related branches found
No related tags found
1 merge request!8Distributed linear solvers
This commit is part of merge request !8. Comments created here will be created in the context of that merge request.
...@@ -36,7 +36,7 @@ public: ...@@ -36,7 +36,7 @@ public:
virtual void update( const MatrixPointer& matrixPointer ) override; virtual void update( const MatrixPointer& matrixPointer ) override;
virtual bool solve( ConstVectorViewType b, VectorViewType x ) const override; virtual void solve( ConstVectorViewType b, VectorViewType x ) const override;
String getType() const String getType() const
{ {
......
...@@ -14,107 +14,47 @@ ...@@ -14,107 +14,47 @@
#include "Diagonal.h" #include "Diagonal.h"
#include <TNL/ParallelFor.h>
namespace TNL { namespace TNL {
namespace Solvers { namespace Solvers {
namespace Linear { namespace Linear {
namespace Preconditioners { namespace Preconditioners {
#ifdef HAVE_CUDA
template< typename Real, typename Index, typename Matrix >
__global__ void matrixDiagonalToVectorKernel( const Matrix* matrix,
Real* diagonal,
Index size ) {
Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
const Index maxGridSize = blockDim. x * gridDim. x;
while( elementIdx < size )
{
diagonal[ elementIdx ] = matrix->getElementFast( elementIdx, elementIdx );
elementIdx += maxGridSize;
}
}
template< typename Real, typename Index >
__global__ void elementwiseVectorDivisionKernel( const Real* left,
const Real* right,
Real* result,
Index size )
{
Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
const Index maxGridSize = blockDim. x * gridDim. x;
while( elementIdx < size )
{
result[ elementIdx ] = left[ elementIdx ] / right[ elementIdx ];
elementIdx += maxGridSize;
}
}
#endif
template< typename Matrix > template< typename Matrix >
void void
Diagonal< Matrix >:: Diagonal< Matrix >::
update( const MatrixPointer& matrixPointer ) update( const MatrixPointer& matrixPointer )
{ {
// std::cout << getType() << "->setMatrix()" << std::endl;
TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" ); TNL_ASSERT_GT( matrixPointer->getRows(), 0, "empty matrix" );
TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" ); TNL_ASSERT_EQ( matrixPointer->getRows(), matrixPointer->getColumns(), "matrix must be square" );
if( diagonal.getSize() != matrixPointer->getRows() ) diagonal.setSize( matrixPointer->getRows() );
diagonal.setSize( matrixPointer->getRows() );
if( std::is_same< DeviceType, Devices::Host >::value ) VectorViewType diag_view( diagonal );
{ const Matrix* kernel_matrix = &matrixPointer.template getData< DeviceType >();
for( int i = 0; i < diagonal.getSize(); i++ ) {
diagonal[ i ] = matrixPointer->getElement( i, i ); auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
}
}
if( std::is_same< DeviceType, Devices::Cuda >::value )
{ {
#ifdef HAVE_CUDA diag_view[ i ] = kernel_matrix->getElementFast( i, i );
const IndexType& size = diagonal.getSize(); };
dim3 cudaBlockSize( 256 );
dim3 cudaBlocks;
cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
Devices::Cuda::synchronizeDevice(); ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
matrixDiagonalToVectorKernel<<< cudaBlocks, cudaBlockSize >>>(
&matrixPointer.template getData< Devices::Cuda >(),
diagonal.getData(),
size );
TNL_CHECK_CUDA_DEVICE;
#endif
}
} }
template< typename Matrix > template< typename Matrix >
bool void
Diagonal< Matrix >:: Diagonal< Matrix >::
solve( ConstVectorViewType b, VectorViewType x ) const solve( ConstVectorViewType b, VectorViewType x ) const
{ {
if( std::is_same< DeviceType, Devices::Host >::value ) ConstVectorViewType diag_view( diagonal );
{
for( int i = 0; i < diagonal.getSize(); i++ ) {
x[ i ] = b[ i ] / diagonal[ i ];
}
}
if( std::is_same< DeviceType, Devices::Cuda >::value )
{
#ifdef HAVE_CUDA
const IndexType& size = diagonal.getSize();
dim3 cudaBlockSize( 256 );
dim3 cudaBlocks;
cudaBlocks.x = min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ) );
elementwiseVectorDivisionKernel<<< cudaBlocks, cudaBlockSize >>>( auto kernel = [=] __cuda_callable__ ( IndexType i ) mutable
b.getData(), {
diagonal.getData(), x[ i ] = b[ i ] / diag_view[ i ];
x.getData(), };
size );
TNL_CHECK_CUDA_DEVICE; ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel );
#endif
}
return true;
} }
} // namespace Preconditioners } // namespace Preconditioners
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment