From 0862d913ed209534ea34e2168ab743f5a3a43280 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Sun, 29 Jun 2014 17:39:13 +0200 Subject: [PATCH] Implementing the linear solvers benchmark. --- src/core/vectors/tnlSharedVector.h | 16 ------ src/implementation/core/arrays/CMakeLists.txt | 1 + .../core/arrays/tnlSharedArray_impl.cu | 29 ++++++++++ .../core/arrays/tnlSharedArray_impl.h | 10 ++-- .../core/vectors/tnlSharedVector_impl.h | 18 +++++++ .../matrices/tnlChunkedEllpackMatrix_impl.h | 7 +-- .../matrices/tnlMatrixReader_impl.h | 53 ++++++++++++++++++- src/matrices/tnlCSRMatrix.h | 4 ++ src/matrices/tnlChunkedEllpackMatrix.h | 10 ++-- src/matrices/tnlDenseMatrix.h | 3 ++ src/matrices/tnlEllpackMatrix.h | 3 ++ src/matrices/tnlMatrixReader.h | 12 ++++- src/matrices/tnlMultidiagonalMatrix.h | 3 ++ src/matrices/tnlSlicedEllpackMatrix.h | 3 ++ src/matrices/tnlTridiagonalMatrix.h | 3 ++ src/solvers/linear/krylov/tnlGMRESSolver.h | 5 +- .../benchmarks/tnl-benchmark-linear-solvers.h | 16 +++++- 17 files changed, 164 insertions(+), 32 deletions(-) create mode 100644 src/implementation/core/arrays/tnlSharedArray_impl.cu diff --git a/src/core/vectors/tnlSharedVector.h b/src/core/vectors/tnlSharedVector.h index 828490c426..8dfe5575c2 100644 --- a/src/core/vectors/tnlSharedVector.h +++ b/src/core/vectors/tnlSharedVector.h @@ -143,20 +143,4 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index > #include <implementation/core/vectors/tnlSharedVector_impl.h> -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION - -extern template class tnlSharedVector< float, tnlHost, int >; -extern template class tnlSharedVector< double, tnlHost, int >; -extern template class tnlSharedVector< float, tnlHost, long int >; -extern template class tnlSharedVector< double, tnlHost, long int >; - -#ifdef HAVE_CUDA -extern template class tnlSharedVector< float, tnlCuda, int >; -extern template class tnlSharedVector< double, tnlCuda, int >; -extern template class tnlSharedVector< float, tnlCuda, long int >; -extern template class tnlSharedVector< double, tnlCuda, long int >; -#endif - -#endif - #endif /* TNLSHAREDVECTOR_H_ */ diff --git a/src/implementation/core/arrays/CMakeLists.txt b/src/implementation/core/arrays/CMakeLists.txt index 69b3be5c4f..90bb09f549 100755 --- a/src/implementation/core/arrays/CMakeLists.txt +++ b/src/implementation/core/arrays/CMakeLists.txt @@ -19,6 +19,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu ${CURRENT_DIR}/tnlArray_impl.cu + ${CURRENT_DIR}/tnlSharedArray_impl.cu ${CURRENT_DIR}/tnlMultiArray_impl.cu PARENT_SCOPE ) ELSE() diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.cu b/src/implementation/core/arrays/tnlSharedArray_impl.cu new file mode 100644 index 0000000000..51d2eae92b --- /dev/null +++ b/src/implementation/core/arrays/tnlSharedArray_impl.cu @@ -0,0 +1,29 @@ +/*************************************************************************** + tnlSharedArray_impl.cu - description + ------------------- + begin : Jan 20, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <core/arrays/tnlSharedArray.h> + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +#ifdef HAVE_CUDA +template class tnlSharedArray< float, tnlCuda, int >; +template class tnlSharedArray< double, tnlCuda, int >; +template class tnlSharedArray< float, tnlCuda, long int >; +template class tnlSharedArray< double, tnlCuda, long int >; +#endif + +#endif \ No newline at end of file diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.h b/src/implementation/core/arrays/tnlSharedArray_impl.h index 134b4c984c..1b3372ae8a 100644 --- a/src/implementation/core/arrays/tnlSharedArray_impl.h +++ b/src/implementation/core/arrays/tnlSharedArray_impl.h @@ -409,16 +409,18 @@ ostream& operator << ( ostream& str, const tnlSharedArray< Element, Device, Inde #ifdef TEMPLATE_EXPLICIT_INSTANTIATION -extern template class tnlSharedArray< float, tnlHost, int >; +// TODO: this does not work with CUDA 5.5 - fix it later + +/*extern template class tnlSharedArray< float, tnlHost, int >; extern template class tnlSharedArray< double, tnlHost, int >; extern template class tnlSharedArray< float, tnlHost, long int >; -extern template class tnlSharedArray< double, tnlHost, long int >; +extern template class tnlSharedArray< double, tnlHost, long int >;*/ #ifdef HAVE_CUDA -extern template class tnlSharedArray< float, tnlCuda, int >; +/*extern template class tnlSharedArray< float, tnlCuda, int >; extern template class tnlSharedArray< double, tnlCuda, int >; extern template class tnlSharedArray< float, tnlCuda, long int >; -extern template class tnlSharedArray< double, tnlCuda, long int >; +extern template class tnlSharedArray< double, tnlCuda, long int >;*/ #endif #endif diff --git a/src/implementation/core/vectors/tnlSharedVector_impl.h b/src/implementation/core/vectors/tnlSharedVector_impl.h index 152d03a3bf..c345157971 100644 --- a/src/implementation/core/vectors/tnlSharedVector_impl.h +++ b/src/implementation/core/vectors/tnlSharedVector_impl.h @@ -372,4 +372,22 @@ void tnlSharedVector< Real, Device, Index > :: computeExclusivePrefixSum( const tnlVectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); } + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +extern template class tnlSharedVector< float, tnlHost, int >; +extern template class tnlSharedVector< double, tnlHost, int >; +extern template class tnlSharedVector< float, tnlHost, long int >; +extern template class tnlSharedVector< double, tnlHost, long int >; + +#ifdef HAVE_CUDA +// TODO: fix this - it does not work with CUDA 5.5 +/*extern template class tnlSharedVector< float, tnlCuda, int >; +extern template class tnlSharedVector< double, tnlCuda, int >; +extern template class tnlSharedVector< float, tnlCuda, long int >; +extern template class tnlSharedVector< double, tnlCuda, long int >;*/ +#endif + +#endif + #endif /* TNLSHAREDVECTOR_H_IMPLEMENTATION */ diff --git a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h index 38742ae43d..9d97fe4d24 100644 --- a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +++ b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h @@ -1062,9 +1062,10 @@ typename Vector::RealType tnlChunkedEllpackMatrix< Real, Device, Index >::chunkV template< typename Real, typename Device, typename Index > - template< typename Vector > -__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const Vector* inVector, - Vector* outVector, + template< typename InVector, + typename OutVector > +__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const InVector* inVector, + OutVector* outVector, int sliceIdx ) const { tnlStaticAssert( DeviceType::DeviceType == tnlCudaDevice, ); diff --git a/src/implementation/matrices/tnlMatrixReader_impl.h b/src/implementation/matrices/tnlMatrixReader_impl.h index 994f8a0b25..c5ce0d7c6b 100644 --- a/src/implementation/matrices/tnlMatrixReader_impl.h +++ b/src/implementation/matrices/tnlMatrixReader_impl.h @@ -44,6 +44,15 @@ template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, bool verbose ) +{ + return tnlMatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose ); +} + +template< typename Matrix > +bool tnlMatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, + Matrix& matrix, + typename Matrix::RowLengthsVector& rowLengths, + bool verbose ) { IndexType rows, columns; bool symmetricMatrix( false ); @@ -51,7 +60,7 @@ bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) ) return false; - tnlVector< int, tnlHost, int > rowLengths; + if( ! matrix.setDimensions( rows, columns ) || ! rowLengths.setSize( rows ) ) { @@ -371,5 +380,47 @@ bool tnlMatrixReader< Matrix >::parseMtxLineWithElement( const tnlString& line, return true; } +template<> +class tnlMatrixReaderDeviceDependentCode< tnlHost > +{ + public: + + template< typename Matrix > + static bool readMtxFile( std::istream& file, + Matrix& matrix, + bool verbose ) + { + typename Matrix::RowLengthsVector rowLengths; + return tnlMatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose ); + } +}; + +template<> +class tnlMatrixReaderDeviceDependentCode< tnlCuda > +{ + public: + + template< typename Matrix > + static bool readMtxFile( std::istream& file, + Matrix& matrix, + bool verbose ) + { + typedef typename Matrix::HostType HostMatrixType; + typedef typename HostMatrixType::RowLengthsVector RowLengthsVector; + + HostMatrixType hostMatrix; + RowLengthsVector rowLengthsVector; + if( ! tnlMatrixReader< HostMatrixType >::readMtxFileHostMatrix( file, hostMatrix, rowLengthsVector, verbose ) ) + return false; + + typename Matrix::RowLengthsVector cudaRowLengthsVector; + cudaRowLengthsVector.setLike( rowLengthsVector ); + cudaRowLengthsVector = rowLengthsVector; + if( ! matrix.copyFrom( hostMatrix, cudaRowLengthsVector ) ) + return false; + return true; + } +}; + #endif /* TNLMATRIXREADER_IMPL_H_ */ diff --git a/src/matrices/tnlCSRMatrix.h b/src/matrices/tnlCSRMatrix.h index d3d67c91c2..efdf540266 100644 --- a/src/matrices/tnlCSRMatrix.h +++ b/src/matrices/tnlCSRMatrix.h @@ -33,6 +33,10 @@ class tnlCSRMatrix : public tnlSparseMatrix< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector; + typedef tnlCSRMatrix< Real, Device, Index > ThisType; + typedef tnlCSRMatrix< Real, tnlHost, Index > HostType; + typedef tnlCSRMatrix< Real, tnlCuda, Index > CudaType; + enum SPMVCudaKernel { scalar, vector, hybrid }; diff --git a/src/matrices/tnlChunkedEllpackMatrix.h b/src/matrices/tnlChunkedEllpackMatrix.h index 87cace2d49..1ee65c1ef7 100644 --- a/src/matrices/tnlChunkedEllpackMatrix.h +++ b/src/matrices/tnlChunkedEllpackMatrix.h @@ -62,6 +62,9 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef Index IndexType; typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector; + typedef tnlChunkedEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlChunkedEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlChunkedEllpackMatrix< Real, tnlCuda, Index > CudaType; tnlChunkedEllpackMatrix(); @@ -184,9 +187,10 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > const Vector& vector ) const; #ifdef HAVE_CUDA - template< typename Vector > - __device__ void computeSliceVectorProduct( const Vector* inVector, - Vector* outVector, + template< typename InVector, + typename OutVector > + __device__ void computeSliceVectorProduct( const InVector* inVector, + OutVector* outVector, int gridIdx ) const; #endif diff --git a/src/matrices/tnlDenseMatrix.h b/src/matrices/tnlDenseMatrix.h index d991c5aefa..4b3989803f 100644 --- a/src/matrices/tnlDenseMatrix.h +++ b/src/matrices/tnlDenseMatrix.h @@ -37,6 +37,9 @@ class tnlDenseMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlDenseMatrix< Real, Device, Index > ThisType; + typedef tnlDenseMatrix< Real, tnlHost, Index > HostType; + typedef tnlDenseMatrix< Real, tnlCuda, Index > CudaType; + tnlDenseMatrix(); diff --git a/src/matrices/tnlEllpackMatrix.h b/src/matrices/tnlEllpackMatrix.h index be90ae6f71..e56b1922ff 100644 --- a/src/matrices/tnlEllpackMatrix.h +++ b/src/matrices/tnlEllpackMatrix.h @@ -36,6 +36,9 @@ class tnlEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef tnlEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlEllpackMatrix< Real, tnlCuda, Index > CudaType; + tnlEllpackMatrix(); diff --git a/src/matrices/tnlMatrixReader.h b/src/matrices/tnlMatrixReader.h index 011def3468..a2ba34efea 100644 --- a/src/matrices/tnlMatrixReader.h +++ b/src/matrices/tnlMatrixReader.h @@ -22,6 +22,10 @@ #include <core/tnlString.h> #include <core/vectors/tnlVector.h> +template< typename Device > +class tnlMatrixReaderDeviceDependentCode +{}; + template< typename Matrix > class tnlMatrixReader { @@ -38,6 +42,12 @@ class tnlMatrixReader Matrix& matrix, bool verbose = false ); + static bool readMtxFileHostMatrix( std::istream& file, + Matrix& matrix, + typename Matrix::RowLengthsVector& rowLengths, + bool verbose ); + + static bool verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose = false ); @@ -74,10 +84,10 @@ class tnlMatrixReader IndexType& row, IndexType& column, RealType& value ); - }; + #include <implementation/matrices/tnlMatrixReader_impl.h> #endif /* TNLMATRIXREADER_H_ */ diff --git a/src/matrices/tnlMultidiagonalMatrix.h b/src/matrices/tnlMultidiagonalMatrix.h index d0ecf615c4..702b448100 100644 --- a/src/matrices/tnlMultidiagonalMatrix.h +++ b/src/matrices/tnlMultidiagonalMatrix.h @@ -34,6 +34,9 @@ class tnlMultidiagonalMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlMultidiagonalMatrix< Real, Device, Index > ThisType; + typedef tnlMultidiagonalMatrix< Real, tnlHost, Index > HostType; + typedef tnlMultidiagonalMatrix< Real, tnlCuda, Index > CudaType; + tnlMultidiagonalMatrix(); diff --git a/src/matrices/tnlSlicedEllpackMatrix.h b/src/matrices/tnlSlicedEllpackMatrix.h index 2530868d27..6184473d8e 100644 --- a/src/matrices/tnlSlicedEllpackMatrix.h +++ b/src/matrices/tnlSlicedEllpackMatrix.h @@ -54,6 +54,9 @@ class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef tnlSlicedEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlSlicedEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlSlicedEllpackMatrix< Real, tnlCuda, Index > CudaType; + tnlSlicedEllpackMatrix(); diff --git a/src/matrices/tnlTridiagonalMatrix.h b/src/matrices/tnlTridiagonalMatrix.h index 7bcafd79e0..b7f77ba2fd 100644 --- a/src/matrices/tnlTridiagonalMatrix.h +++ b/src/matrices/tnlTridiagonalMatrix.h @@ -36,6 +36,9 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlTridiagonalMatrix< Real, Device, Index > ThisType; + typedef tnlTridiagonalMatrix< Real, tnlHost, Index > HostType; + typedef tnlTridiagonalMatrix< Real, tnlCuda, Index > CudaType; + tnlTridiagonalMatrix(); diff --git a/src/solvers/linear/krylov/tnlGMRESSolver.h b/src/solvers/linear/krylov/tnlGMRESSolver.h index 0fbf069db5..4ccee98ee4 100644 --- a/src/solvers/linear/krylov/tnlGMRESSolver.h +++ b/src/solvers/linear/krylov/tnlGMRESSolver.h @@ -122,10 +122,11 @@ extern template class tnlGMRESSolver< tnlMultiDiagonalMatrix< double, tnlHost, l #ifdef HAVE_CUDA -extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, int > >; +// TODO: fix this - does not work with CUDA 5.5 +/*extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, long int > >; -extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >; +extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >;*/ /*extern template class tnlGMRESSolver< tnlEllpackMatrix< float, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlEllpackMatrix< double, tnlCuda, int > >; diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.h b/tests/benchmarks/tnl-benchmark-linear-solvers.h index 82f506dee8..1d5fa65232 100644 --- a/tests/benchmarks/tnl-benchmark-linear-solvers.h +++ b/tests/benchmarks/tnl-benchmark-linear-solvers.h @@ -34,6 +34,9 @@ #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlMatrixReader.h> #include <solvers/linear/krylov/tnlGMRESSolver.h> +#include <solvers/linear/krylov/tnlCGSolver.h> +#include <solvers/linear/krylov/tnlBICGStabSolver.h> +#include <solvers/linear/krylov/tnlTFQMRSolver.h> #include <solvers/linear/tnlLinearResidueGetter.h> #include <solvers/tnlIterativeSolverMonitor.h> @@ -119,6 +122,15 @@ bool resolveLinearSolver( const tnlParameterContainer& parameters ) if( solver == "gmres" ) return benchmarkSolver< tnlGMRESSolver< Matrix > >( parameters, matrix ); + if( solver == "cg" ) + return benchmarkSolver< tnlCGSolver< Matrix > >( parameters, matrix ); + + if( solver == "bicgstab" ) + return benchmarkSolver< tnlBICGStabSolver< Matrix > >( parameters, matrix ); + + if( solver == "tfqmr" ) + return benchmarkSolver< tnlTFQMRSolver< Matrix > >( parameters, matrix ); + cerr << "Unknown solver " << solver << "." << endl; return false; } @@ -162,8 +174,8 @@ bool resolveDevice( const tnlParameterContainer& parameters ) if( device == "host" ) return resolveMatrixFormat< Real, tnlHost >( parameters ); - //if( device == "cuda" ) - // return resolveMatrixFormat< Real, tnlCuda >( parameters ); + if( device == "cuda" ) + return resolveMatrixFormat< Real, tnlCuda >( parameters ); cerr << "Uknown device " << device << "." << endl; return false; -- GitLab