diff --git a/src/core/vectors/tnlSharedVector.h b/src/core/vectors/tnlSharedVector.h index 828490c426dfbe7459439927c73b6d8b2518afa6..8dfe5575c2c38dcf02cedae88311a300d80af270 100644 --- a/src/core/vectors/tnlSharedVector.h +++ b/src/core/vectors/tnlSharedVector.h @@ -143,20 +143,4 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index > #include <implementation/core/vectors/tnlSharedVector_impl.h> -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION - -extern template class tnlSharedVector< float, tnlHost, int >; -extern template class tnlSharedVector< double, tnlHost, int >; -extern template class tnlSharedVector< float, tnlHost, long int >; -extern template class tnlSharedVector< double, tnlHost, long int >; - -#ifdef HAVE_CUDA -extern template class tnlSharedVector< float, tnlCuda, int >; -extern template class tnlSharedVector< double, tnlCuda, int >; -extern template class tnlSharedVector< float, tnlCuda, long int >; -extern template class tnlSharedVector< double, tnlCuda, long int >; -#endif - -#endif - #endif /* TNLSHAREDVECTOR_H_ */ diff --git a/src/implementation/core/arrays/CMakeLists.txt b/src/implementation/core/arrays/CMakeLists.txt index 69b3be5c4f469c5b45965e9eebed8bfacbbcccd0..90bb09f5495c8c3b0c24701df093c2814e3d4fd9 100755 --- a/src/implementation/core/arrays/CMakeLists.txt +++ b/src/implementation/core/arrays/CMakeLists.txt @@ -19,6 +19,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu ${CURRENT_DIR}/tnlArray_impl.cu + ${CURRENT_DIR}/tnlSharedArray_impl.cu ${CURRENT_DIR}/tnlMultiArray_impl.cu PARENT_SCOPE ) ELSE() diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.cu b/src/implementation/core/arrays/tnlSharedArray_impl.cu new file mode 100644 index 0000000000000000000000000000000000000000..51d2eae92b17f2e597ba3754df9264389ad7c634 --- /dev/null +++ b/src/implementation/core/arrays/tnlSharedArray_impl.cu @@ -0,0 +1,29 @@ +/*************************************************************************** + tnlSharedArray_impl.cu - description + ------------------- + begin : Jan 20, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <core/arrays/tnlSharedArray.h> + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +#ifdef HAVE_CUDA +template class tnlSharedArray< float, tnlCuda, int >; +template class tnlSharedArray< double, tnlCuda, int >; +template class tnlSharedArray< float, tnlCuda, long int >; +template class tnlSharedArray< double, tnlCuda, long int >; +#endif + +#endif \ No newline at end of file diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.h b/src/implementation/core/arrays/tnlSharedArray_impl.h index 134b4c984c7129da9965d8adfbadc2a0f0d666a8..1b3372ae8a89ffd7755bb9b47a435f5b8024dbed 100644 --- a/src/implementation/core/arrays/tnlSharedArray_impl.h +++ b/src/implementation/core/arrays/tnlSharedArray_impl.h @@ -409,16 +409,18 @@ ostream& operator << ( ostream& str, const tnlSharedArray< Element, Device, Inde #ifdef TEMPLATE_EXPLICIT_INSTANTIATION -extern template class tnlSharedArray< float, tnlHost, int >; +// TODO: this does not work with CUDA 5.5 - fix it later + +/*extern template class tnlSharedArray< float, tnlHost, int >; extern template class tnlSharedArray< double, tnlHost, int >; extern template class tnlSharedArray< float, tnlHost, long int >; -extern template class tnlSharedArray< double, tnlHost, long int >; +extern template class tnlSharedArray< double, tnlHost, long int >;*/ #ifdef HAVE_CUDA -extern template class tnlSharedArray< float, tnlCuda, int >; +/*extern template class tnlSharedArray< float, tnlCuda, int >; extern template class tnlSharedArray< double, tnlCuda, int >; extern template class tnlSharedArray< float, tnlCuda, long int >; -extern template class tnlSharedArray< double, tnlCuda, long int >; +extern template class tnlSharedArray< double, tnlCuda, long int >;*/ #endif #endif diff --git a/src/implementation/core/vectors/tnlSharedVector_impl.h b/src/implementation/core/vectors/tnlSharedVector_impl.h index 152d03a3bfe480915db20b51a85901e93d6489a3..c345157971eb12b4d24c1971c510319fe307d40e 100644 --- a/src/implementation/core/vectors/tnlSharedVector_impl.h +++ b/src/implementation/core/vectors/tnlSharedVector_impl.h @@ -372,4 +372,22 @@ void tnlSharedVector< Real, Device, Index > :: computeExclusivePrefixSum( const tnlVectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end ); } + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +extern template class tnlSharedVector< float, tnlHost, int >; +extern template class tnlSharedVector< double, tnlHost, int >; +extern template class tnlSharedVector< float, tnlHost, long int >; +extern template class tnlSharedVector< double, tnlHost, long int >; + +#ifdef HAVE_CUDA +// TODO: fix this - it does not work with CUDA 5.5 +/*extern template class tnlSharedVector< float, tnlCuda, int >; +extern template class tnlSharedVector< double, tnlCuda, int >; +extern template class tnlSharedVector< float, tnlCuda, long int >; +extern template class tnlSharedVector< double, tnlCuda, long int >;*/ +#endif + +#endif + #endif /* TNLSHAREDVECTOR_H_IMPLEMENTATION */ diff --git a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h index 38742ae43d526ee8bb02946577dfe60fa54eb514..9d97fe4d2401d33a35aae74fa075e8f76e28a89f 100644 --- a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +++ b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h @@ -1062,9 +1062,10 @@ typename Vector::RealType tnlChunkedEllpackMatrix< Real, Device, Index >::chunkV template< typename Real, typename Device, typename Index > - template< typename Vector > -__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const Vector* inVector, - Vector* outVector, + template< typename InVector, + typename OutVector > +__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const InVector* inVector, + OutVector* outVector, int sliceIdx ) const { tnlStaticAssert( DeviceType::DeviceType == tnlCudaDevice, ); diff --git a/src/implementation/matrices/tnlMatrixReader_impl.h b/src/implementation/matrices/tnlMatrixReader_impl.h index 994f8a0b25f8f506b14bb49e4ef787a6b5c91486..c5ce0d7c6b3cb43884e77e7fa05306fb74ad1937 100644 --- a/src/implementation/matrices/tnlMatrixReader_impl.h +++ b/src/implementation/matrices/tnlMatrixReader_impl.h @@ -44,6 +44,15 @@ template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, bool verbose ) +{ + return tnlMatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose ); +} + +template< typename Matrix > +bool tnlMatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, + Matrix& matrix, + typename Matrix::RowLengthsVector& rowLengths, + bool verbose ) { IndexType rows, columns; bool symmetricMatrix( false ); @@ -51,7 +60,7 @@ bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) ) return false; - tnlVector< int, tnlHost, int > rowLengths; + if( ! matrix.setDimensions( rows, columns ) || ! rowLengths.setSize( rows ) ) { @@ -371,5 +380,47 @@ bool tnlMatrixReader< Matrix >::parseMtxLineWithElement( const tnlString& line, return true; } +template<> +class tnlMatrixReaderDeviceDependentCode< tnlHost > +{ + public: + + template< typename Matrix > + static bool readMtxFile( std::istream& file, + Matrix& matrix, + bool verbose ) + { + typename Matrix::RowLengthsVector rowLengths; + return tnlMatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose ); + } +}; + +template<> +class tnlMatrixReaderDeviceDependentCode< tnlCuda > +{ + public: + + template< typename Matrix > + static bool readMtxFile( std::istream& file, + Matrix& matrix, + bool verbose ) + { + typedef typename Matrix::HostType HostMatrixType; + typedef typename HostMatrixType::RowLengthsVector RowLengthsVector; + + HostMatrixType hostMatrix; + RowLengthsVector rowLengthsVector; + if( ! tnlMatrixReader< HostMatrixType >::readMtxFileHostMatrix( file, hostMatrix, rowLengthsVector, verbose ) ) + return false; + + typename Matrix::RowLengthsVector cudaRowLengthsVector; + cudaRowLengthsVector.setLike( rowLengthsVector ); + cudaRowLengthsVector = rowLengthsVector; + if( ! matrix.copyFrom( hostMatrix, cudaRowLengthsVector ) ) + return false; + return true; + } +}; + #endif /* TNLMATRIXREADER_IMPL_H_ */ diff --git a/src/matrices/tnlCSRMatrix.h b/src/matrices/tnlCSRMatrix.h index d3d67c91c2f39388db0601ed5184027469d4c3a7..efdf54026601e629f381ed9a8cf84797eb368793 100644 --- a/src/matrices/tnlCSRMatrix.h +++ b/src/matrices/tnlCSRMatrix.h @@ -33,6 +33,10 @@ class tnlCSRMatrix : public tnlSparseMatrix< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector; + typedef tnlCSRMatrix< Real, Device, Index > ThisType; + typedef tnlCSRMatrix< Real, tnlHost, Index > HostType; + typedef tnlCSRMatrix< Real, tnlCuda, Index > CudaType; + enum SPMVCudaKernel { scalar, vector, hybrid }; diff --git a/src/matrices/tnlChunkedEllpackMatrix.h b/src/matrices/tnlChunkedEllpackMatrix.h index 87cace2d49db4ca9b1ed9a7eff1ff6de2b624d21..1ee65c1ef76d7725a2b2f4f58f08e56b9ce39dcc 100644 --- a/src/matrices/tnlChunkedEllpackMatrix.h +++ b/src/matrices/tnlChunkedEllpackMatrix.h @@ -62,6 +62,9 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef Index IndexType; typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector; + typedef tnlChunkedEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlChunkedEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlChunkedEllpackMatrix< Real, tnlCuda, Index > CudaType; tnlChunkedEllpackMatrix(); @@ -184,9 +187,10 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > const Vector& vector ) const; #ifdef HAVE_CUDA - template< typename Vector > - __device__ void computeSliceVectorProduct( const Vector* inVector, - Vector* outVector, + template< typename InVector, + typename OutVector > + __device__ void computeSliceVectorProduct( const InVector* inVector, + OutVector* outVector, int gridIdx ) const; #endif diff --git a/src/matrices/tnlDenseMatrix.h b/src/matrices/tnlDenseMatrix.h index d991c5aefa3644ea1b8d3324ec3f42833afe8120..4b3989803fa9ecaaa94a01e28136caafb0fa3968 100644 --- a/src/matrices/tnlDenseMatrix.h +++ b/src/matrices/tnlDenseMatrix.h @@ -37,6 +37,9 @@ class tnlDenseMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlDenseMatrix< Real, Device, Index > ThisType; + typedef tnlDenseMatrix< Real, tnlHost, Index > HostType; + typedef tnlDenseMatrix< Real, tnlCuda, Index > CudaType; + tnlDenseMatrix(); diff --git a/src/matrices/tnlEllpackMatrix.h b/src/matrices/tnlEllpackMatrix.h index be90ae6f711e6f041067dfc45c4980a6dc420bec..e56b1922ff21e0ab8ac0032615b82d95001081cb 100644 --- a/src/matrices/tnlEllpackMatrix.h +++ b/src/matrices/tnlEllpackMatrix.h @@ -36,6 +36,9 @@ class tnlEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef tnlEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlEllpackMatrix< Real, tnlCuda, Index > CudaType; + tnlEllpackMatrix(); diff --git a/src/matrices/tnlMatrixReader.h b/src/matrices/tnlMatrixReader.h index 011def34683b5e1c369556e90424267d7403c150..a2ba34efea5f7fee523ae16226ea931d5c0f0268 100644 --- a/src/matrices/tnlMatrixReader.h +++ b/src/matrices/tnlMatrixReader.h @@ -22,6 +22,10 @@ #include <core/tnlString.h> #include <core/vectors/tnlVector.h> +template< typename Device > +class tnlMatrixReaderDeviceDependentCode +{}; + template< typename Matrix > class tnlMatrixReader { @@ -38,6 +42,12 @@ class tnlMatrixReader Matrix& matrix, bool verbose = false ); + static bool readMtxFileHostMatrix( std::istream& file, + Matrix& matrix, + typename Matrix::RowLengthsVector& rowLengths, + bool verbose ); + + static bool verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose = false ); @@ -74,10 +84,10 @@ class tnlMatrixReader IndexType& row, IndexType& column, RealType& value ); - }; + #include <implementation/matrices/tnlMatrixReader_impl.h> #endif /* TNLMATRIXREADER_H_ */ diff --git a/src/matrices/tnlMultidiagonalMatrix.h b/src/matrices/tnlMultidiagonalMatrix.h index d0ecf615c4d9f6c15a506250e031e90804376501..702b4481002262773ade66a5bf32a662dd13ee5e 100644 --- a/src/matrices/tnlMultidiagonalMatrix.h +++ b/src/matrices/tnlMultidiagonalMatrix.h @@ -34,6 +34,9 @@ class tnlMultidiagonalMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlMultidiagonalMatrix< Real, Device, Index > ThisType; + typedef tnlMultidiagonalMatrix< Real, tnlHost, Index > HostType; + typedef tnlMultidiagonalMatrix< Real, tnlCuda, Index > CudaType; + tnlMultidiagonalMatrix(); diff --git a/src/matrices/tnlSlicedEllpackMatrix.h b/src/matrices/tnlSlicedEllpackMatrix.h index 2530868d274a8c944a06095ff24fb293f0bb2f4f..6184473d8e386787dc4af60e9492cf6f14e59ac8 100644 --- a/src/matrices/tnlSlicedEllpackMatrix.h +++ b/src/matrices/tnlSlicedEllpackMatrix.h @@ -54,6 +54,9 @@ class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef tnlSlicedEllpackMatrix< Real, Device, Index > ThisType; + typedef tnlSlicedEllpackMatrix< Real, tnlHost, Index > HostType; + typedef tnlSlicedEllpackMatrix< Real, tnlCuda, Index > CudaType; + tnlSlicedEllpackMatrix(); diff --git a/src/matrices/tnlTridiagonalMatrix.h b/src/matrices/tnlTridiagonalMatrix.h index 7bcafd79e0ae21684b87a1b3789a0ca5a492d58d..b7f77ba2fdd9dfd802ff66ac61c85cb4b7b6c729 100644 --- a/src/matrices/tnlTridiagonalMatrix.h +++ b/src/matrices/tnlTridiagonalMatrix.h @@ -36,6 +36,9 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; typedef tnlTridiagonalMatrix< Real, Device, Index > ThisType; + typedef tnlTridiagonalMatrix< Real, tnlHost, Index > HostType; + typedef tnlTridiagonalMatrix< Real, tnlCuda, Index > CudaType; + tnlTridiagonalMatrix(); diff --git a/src/solvers/linear/krylov/tnlGMRESSolver.h b/src/solvers/linear/krylov/tnlGMRESSolver.h index 0fbf069db5f47202a1dc5c8369722dc6a5c2cf5c..4ccee98ee4981307b96e099461611323f3b7c2a8 100644 --- a/src/solvers/linear/krylov/tnlGMRESSolver.h +++ b/src/solvers/linear/krylov/tnlGMRESSolver.h @@ -122,10 +122,11 @@ extern template class tnlGMRESSolver< tnlMultiDiagonalMatrix< double, tnlHost, l #ifdef HAVE_CUDA -extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, int > >; +// TODO: fix this - does not work with CUDA 5.5 +/*extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlCSRMatrix< float, tnlCuda, long int > >; -extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >; +extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >;*/ /*extern template class tnlGMRESSolver< tnlEllpackMatrix< float, tnlCuda, int > >; extern template class tnlGMRESSolver< tnlEllpackMatrix< double, tnlCuda, int > >; diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.h b/tests/benchmarks/tnl-benchmark-linear-solvers.h index 82f506dee8bf94c1b63074f9855136f146f1b8bb..1d5fa65232cb6b1f7ce24d7392796730111dc7a2 100644 --- a/tests/benchmarks/tnl-benchmark-linear-solvers.h +++ b/tests/benchmarks/tnl-benchmark-linear-solvers.h @@ -34,6 +34,9 @@ #include <matrices/tnlChunkedEllpackMatrix.h> #include <matrices/tnlMatrixReader.h> #include <solvers/linear/krylov/tnlGMRESSolver.h> +#include <solvers/linear/krylov/tnlCGSolver.h> +#include <solvers/linear/krylov/tnlBICGStabSolver.h> +#include <solvers/linear/krylov/tnlTFQMRSolver.h> #include <solvers/linear/tnlLinearResidueGetter.h> #include <solvers/tnlIterativeSolverMonitor.h> @@ -119,6 +122,15 @@ bool resolveLinearSolver( const tnlParameterContainer& parameters ) if( solver == "gmres" ) return benchmarkSolver< tnlGMRESSolver< Matrix > >( parameters, matrix ); + if( solver == "cg" ) + return benchmarkSolver< tnlCGSolver< Matrix > >( parameters, matrix ); + + if( solver == "bicgstab" ) + return benchmarkSolver< tnlBICGStabSolver< Matrix > >( parameters, matrix ); + + if( solver == "tfqmr" ) + return benchmarkSolver< tnlTFQMRSolver< Matrix > >( parameters, matrix ); + cerr << "Unknown solver " << solver << "." << endl; return false; } @@ -162,8 +174,8 @@ bool resolveDevice( const tnlParameterContainer& parameters ) if( device == "host" ) return resolveMatrixFormat< Real, tnlHost >( parameters ); - //if( device == "cuda" ) - // return resolveMatrixFormat< Real, tnlCuda >( parameters ); + if( device == "cuda" ) + return resolveMatrixFormat< Real, tnlCuda >( parameters ); cerr << "Uknown device " << device << "." << endl; return false;