From 0862d913ed209534ea34e2168ab743f5a3a43280 Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Sun, 29 Jun 2014 17:39:13 +0200
Subject: [PATCH] Implementing the linear solvers benchmark.

---
 src/core/vectors/tnlSharedVector.h            | 16 ------
 src/implementation/core/arrays/CMakeLists.txt |  1 +
 .../core/arrays/tnlSharedArray_impl.cu        | 29 ++++++++++
 .../core/arrays/tnlSharedArray_impl.h         | 10 ++--
 .../core/vectors/tnlSharedVector_impl.h       | 18 +++++++
 .../matrices/tnlChunkedEllpackMatrix_impl.h   |  7 +--
 .../matrices/tnlMatrixReader_impl.h           | 53 ++++++++++++++++++-
 src/matrices/tnlCSRMatrix.h                   |  4 ++
 src/matrices/tnlChunkedEllpackMatrix.h        | 10 ++--
 src/matrices/tnlDenseMatrix.h                 |  3 ++
 src/matrices/tnlEllpackMatrix.h               |  3 ++
 src/matrices/tnlMatrixReader.h                | 12 ++++-
 src/matrices/tnlMultidiagonalMatrix.h         |  3 ++
 src/matrices/tnlSlicedEllpackMatrix.h         |  3 ++
 src/matrices/tnlTridiagonalMatrix.h           |  3 ++
 src/solvers/linear/krylov/tnlGMRESSolver.h    |  5 +-
 .../benchmarks/tnl-benchmark-linear-solvers.h | 16 +++++-
 17 files changed, 164 insertions(+), 32 deletions(-)
 create mode 100644 src/implementation/core/arrays/tnlSharedArray_impl.cu

diff --git a/src/core/vectors/tnlSharedVector.h b/src/core/vectors/tnlSharedVector.h
index 828490c426..8dfe5575c2 100644
--- a/src/core/vectors/tnlSharedVector.h
+++ b/src/core/vectors/tnlSharedVector.h
@@ -143,20 +143,4 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index >
 
 #include <implementation/core/vectors/tnlSharedVector_impl.h>
 
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-extern template class tnlSharedVector< float, tnlHost, int >;
-extern template class tnlSharedVector< double, tnlHost, int >;
-extern template class tnlSharedVector< float, tnlHost, long int >;
-extern template class tnlSharedVector< double, tnlHost, long int >;
-
-#ifdef HAVE_CUDA
-extern template class tnlSharedVector< float, tnlCuda, int >;
-extern template class tnlSharedVector< double, tnlCuda, int >;
-extern template class tnlSharedVector< float, tnlCuda, long int >;
-extern template class tnlSharedVector< double, tnlCuda, long int >;
-#endif
-
-#endif
-
 #endif /* TNLSHAREDVECTOR_H_ */
diff --git a/src/implementation/core/arrays/CMakeLists.txt b/src/implementation/core/arrays/CMakeLists.txt
index 69b3be5c4f..90bb09f549 100755
--- a/src/implementation/core/arrays/CMakeLists.txt
+++ b/src/implementation/core/arrays/CMakeLists.txt
@@ -19,6 +19,7 @@ IF( BUILD_CUDA )
         ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu
         ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu
         ${CURRENT_DIR}/tnlArray_impl.cu
+        ${CURRENT_DIR}/tnlSharedArray_impl.cu
         ${CURRENT_DIR}/tnlMultiArray_impl.cu
         PARENT_SCOPE )
 ELSE()
diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.cu b/src/implementation/core/arrays/tnlSharedArray_impl.cu
new file mode 100644
index 0000000000..51d2eae92b
--- /dev/null
+++ b/src/implementation/core/arrays/tnlSharedArray_impl.cu
@@ -0,0 +1,29 @@
+/***************************************************************************
+                          tnlSharedArray_impl.cu  -  description
+                             -------------------
+    begin                : Jan 20, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <core/arrays/tnlSharedArray.h>
+
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+#ifdef HAVE_CUDA
+template class tnlSharedArray< float, tnlCuda, int >;
+template class tnlSharedArray< double, tnlCuda, int >;
+template class tnlSharedArray< float, tnlCuda, long int >;
+template class tnlSharedArray< double, tnlCuda, long int >;
+#endif
+
+#endif
\ No newline at end of file
diff --git a/src/implementation/core/arrays/tnlSharedArray_impl.h b/src/implementation/core/arrays/tnlSharedArray_impl.h
index 134b4c984c..1b3372ae8a 100644
--- a/src/implementation/core/arrays/tnlSharedArray_impl.h
+++ b/src/implementation/core/arrays/tnlSharedArray_impl.h
@@ -409,16 +409,18 @@ ostream& operator << ( ostream& str, const tnlSharedArray< Element, Device, Inde
 
 #ifdef TEMPLATE_EXPLICIT_INSTANTIATION
 
-extern template class tnlSharedArray< float, tnlHost, int >;
+// TODO: this does not work with CUDA 5.5 - fix it later
+
+/*extern template class tnlSharedArray< float, tnlHost, int >;
 extern template class tnlSharedArray< double, tnlHost, int >;
 extern template class tnlSharedArray< float, tnlHost, long int >;
-extern template class tnlSharedArray< double, tnlHost, long int >;
+extern template class tnlSharedArray< double, tnlHost, long int >;*/
 
 #ifdef HAVE_CUDA
-extern template class tnlSharedArray< float, tnlCuda, int >;
+/*extern template class tnlSharedArray< float, tnlCuda, int >;
 extern template class tnlSharedArray< double, tnlCuda, int >;
 extern template class tnlSharedArray< float, tnlCuda, long int >;
-extern template class tnlSharedArray< double, tnlCuda, long int >;
+extern template class tnlSharedArray< double, tnlCuda, long int >;*/
 #endif
 
 #endif
diff --git a/src/implementation/core/vectors/tnlSharedVector_impl.h b/src/implementation/core/vectors/tnlSharedVector_impl.h
index 152d03a3bf..c345157971 100644
--- a/src/implementation/core/vectors/tnlSharedVector_impl.h
+++ b/src/implementation/core/vectors/tnlSharedVector_impl.h
@@ -372,4 +372,22 @@ void tnlSharedVector< Real, Device, Index > :: computeExclusivePrefixSum( const
    tnlVectorOperations< Device >::computeExclusivePrefixSum( *this, begin, end );
 }
 
+
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+extern template class tnlSharedVector< float, tnlHost, int >;
+extern template class tnlSharedVector< double, tnlHost, int >;
+extern template class tnlSharedVector< float, tnlHost, long int >;
+extern template class tnlSharedVector< double, tnlHost, long int >;
+
+#ifdef HAVE_CUDA
+// TODO: fix this - it does not work with CUDA 5.5
+/*extern template class tnlSharedVector< float, tnlCuda, int >;
+extern template class tnlSharedVector< double, tnlCuda, int >;
+extern template class tnlSharedVector< float, tnlCuda, long int >;
+extern template class tnlSharedVector< double, tnlCuda, long int >;*/
+#endif
+
+#endif
+
 #endif /* TNLSHAREDVECTOR_H_IMPLEMENTATION */
diff --git a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h
index 38742ae43d..9d97fe4d24 100644
--- a/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h
+++ b/src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h
@@ -1062,9 +1062,10 @@ typename Vector::RealType tnlChunkedEllpackMatrix< Real, Device, Index >::chunkV
 template< typename Real,
           typename Device,
           typename Index >
-   template< typename Vector >
-__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const Vector* inVector,
-                                                                                           Vector* outVector,
+   template< typename InVector,
+             typename OutVector >
+__device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVectorProduct( const InVector* inVector,
+                                                                                           OutVector* outVector,
                                                                                            int sliceIdx  ) const
 {
    tnlStaticAssert( DeviceType::DeviceType == tnlCudaDevice, );
diff --git a/src/implementation/matrices/tnlMatrixReader_impl.h b/src/implementation/matrices/tnlMatrixReader_impl.h
index 994f8a0b25..c5ce0d7c6b 100644
--- a/src/implementation/matrices/tnlMatrixReader_impl.h
+++ b/src/implementation/matrices/tnlMatrixReader_impl.h
@@ -44,6 +44,15 @@ template< typename Matrix >
 bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file,
                                              Matrix& matrix,
                                              bool verbose )
+{
+   return tnlMatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose );
+}
+
+template< typename Matrix >
+bool tnlMatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
+                                                       Matrix& matrix,
+                                                       typename Matrix::RowLengthsVector& rowLengths,
+                                                       bool verbose )
 {
    IndexType rows, columns;
    bool symmetricMatrix( false );
@@ -51,7 +60,7 @@ bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file,
    if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) )
       return false;
 
-   tnlVector< int, tnlHost, int > rowLengths;
+
    if( ! matrix.setDimensions( rows, columns ) ||
        ! rowLengths.setSize( rows ) )
    {
@@ -371,5 +380,47 @@ bool tnlMatrixReader< Matrix >::parseMtxLineWithElement( const tnlString& line,
    return true;
 }
 
+template<>
+class tnlMatrixReaderDeviceDependentCode< tnlHost >
+{
+   public:
+
+   template< typename Matrix >
+   static bool readMtxFile( std::istream& file,
+                            Matrix& matrix,
+                            bool verbose )
+   {
+      typename Matrix::RowLengthsVector rowLengths;
+      return tnlMatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose );
+   }
+};
+
+template<>
+class tnlMatrixReaderDeviceDependentCode< tnlCuda >
+{
+   public:
+
+   template< typename Matrix >
+   static bool readMtxFile( std::istream& file,
+                            Matrix& matrix,
+                            bool verbose )
+   {
+      typedef typename Matrix::HostType HostMatrixType;
+      typedef typename HostMatrixType::RowLengthsVector RowLengthsVector;
+
+      HostMatrixType hostMatrix;
+      RowLengthsVector rowLengthsVector;
+      if( ! tnlMatrixReader< HostMatrixType >::readMtxFileHostMatrix( file, hostMatrix, rowLengthsVector, verbose ) )
+         return false;
+
+      typename Matrix::RowLengthsVector cudaRowLengthsVector;
+      cudaRowLengthsVector.setLike( rowLengthsVector );
+      cudaRowLengthsVector = rowLengthsVector;
+      if( ! matrix.copyFrom( hostMatrix, cudaRowLengthsVector ) )
+         return false;
+      return true;
+   }
+};
+
 
 #endif /* TNLMATRIXREADER_IMPL_H_ */
diff --git a/src/matrices/tnlCSRMatrix.h b/src/matrices/tnlCSRMatrix.h
index d3d67c91c2..efdf540266 100644
--- a/src/matrices/tnlCSRMatrix.h
+++ b/src/matrices/tnlCSRMatrix.h
@@ -33,6 +33,10 @@ class tnlCSRMatrix : public tnlSparseMatrix< Real, Device, Index >
    typedef Device DeviceType;
    typedef Index IndexType;
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector;
+   typedef tnlCSRMatrix< Real, Device, Index > ThisType;
+   typedef tnlCSRMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlCSRMatrix< Real, tnlCuda, Index > CudaType;
+
 
    enum SPMVCudaKernel { scalar, vector, hybrid };
 
diff --git a/src/matrices/tnlChunkedEllpackMatrix.h b/src/matrices/tnlChunkedEllpackMatrix.h
index 87cace2d49..1ee65c1ef7 100644
--- a/src/matrices/tnlChunkedEllpackMatrix.h
+++ b/src/matrices/tnlChunkedEllpackMatrix.h
@@ -62,6 +62,9 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
    typedef Index IndexType;
    typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo;
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >:: RowLengthsVector RowLengthsVector;
+   typedef tnlChunkedEllpackMatrix< Real, Device, Index > ThisType;
+   typedef tnlChunkedEllpackMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlChunkedEllpackMatrix< Real, tnlCuda, Index > CudaType;
 
    tnlChunkedEllpackMatrix();
 
@@ -184,9 +187,10 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
                                                const Vector& vector ) const;
 
 #ifdef HAVE_CUDA
-   template< typename Vector >
-   __device__ void computeSliceVectorProduct( const Vector* inVector,
-                                              Vector* outVector,
+   template< typename InVector,
+             typename OutVector >
+   __device__ void computeSliceVectorProduct( const InVector* inVector,
+                                              OutVector* outVector,
                                               int gridIdx  ) const;
 #endif
 
diff --git a/src/matrices/tnlDenseMatrix.h b/src/matrices/tnlDenseMatrix.h
index d991c5aefa..4b3989803f 100644
--- a/src/matrices/tnlDenseMatrix.h
+++ b/src/matrices/tnlDenseMatrix.h
@@ -37,6 +37,9 @@ class tnlDenseMatrix : public tnlMatrix< Real, Device, Index >
    typedef Index IndexType;
    typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector;
    typedef tnlDenseMatrix< Real, Device, Index > ThisType;
+   typedef tnlDenseMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlDenseMatrix< Real, tnlCuda, Index > CudaType;
+
 
    tnlDenseMatrix();
 
diff --git a/src/matrices/tnlEllpackMatrix.h b/src/matrices/tnlEllpackMatrix.h
index be90ae6f71..e56b1922ff 100644
--- a/src/matrices/tnlEllpackMatrix.h
+++ b/src/matrices/tnlEllpackMatrix.h
@@ -36,6 +36,9 @@ class tnlEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef tnlEllpackMatrix< Real, Device, Index > ThisType;
+   typedef tnlEllpackMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlEllpackMatrix< Real, tnlCuda, Index > CudaType;
+
 
    tnlEllpackMatrix();
 
diff --git a/src/matrices/tnlMatrixReader.h b/src/matrices/tnlMatrixReader.h
index 011def3468..a2ba34efea 100644
--- a/src/matrices/tnlMatrixReader.h
+++ b/src/matrices/tnlMatrixReader.h
@@ -22,6 +22,10 @@
 #include <core/tnlString.h>
 #include <core/vectors/tnlVector.h>
 
+template< typename Device >
+class tnlMatrixReaderDeviceDependentCode
+{};
+
 template< typename Matrix >
 class tnlMatrixReader
 {
@@ -38,6 +42,12 @@ class tnlMatrixReader
                             Matrix& matrix,
                             bool verbose = false );
 
+   static bool readMtxFileHostMatrix( std::istream& file,
+                                      Matrix& matrix,
+                                      typename Matrix::RowLengthsVector& rowLengths,
+                                      bool verbose );
+
+
    static bool verifyMtxFile( std::istream& file,
                               const Matrix& matrix,
                               bool verbose = false );
@@ -74,10 +84,10 @@ class tnlMatrixReader
                                         IndexType& row,
                                         IndexType& column,
                                         RealType& value );
-
 };
 
 
+
 #include <implementation/matrices/tnlMatrixReader_impl.h>
 
 #endif /* TNLMATRIXREADER_H_ */
diff --git a/src/matrices/tnlMultidiagonalMatrix.h b/src/matrices/tnlMultidiagonalMatrix.h
index d0ecf615c4..702b448100 100644
--- a/src/matrices/tnlMultidiagonalMatrix.h
+++ b/src/matrices/tnlMultidiagonalMatrix.h
@@ -34,6 +34,9 @@ class tnlMultidiagonalMatrix : public tnlMatrix< Real, Device, Index >
    typedef Index IndexType;
    typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector;
    typedef tnlMultidiagonalMatrix< Real, Device, Index > ThisType;
+   typedef tnlMultidiagonalMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlMultidiagonalMatrix< Real, tnlCuda, Index > CudaType;
+
 
    tnlMultidiagonalMatrix();
 
diff --git a/src/matrices/tnlSlicedEllpackMatrix.h b/src/matrices/tnlSlicedEllpackMatrix.h
index 2530868d27..6184473d8e 100644
--- a/src/matrices/tnlSlicedEllpackMatrix.h
+++ b/src/matrices/tnlSlicedEllpackMatrix.h
@@ -54,6 +54,9 @@ class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
    typedef typename tnlSparseMatrix< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
    typedef tnlSlicedEllpackMatrix< Real, Device, Index > ThisType;
+   typedef tnlSlicedEllpackMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlSlicedEllpackMatrix< Real, tnlCuda, Index > CudaType;
+
 
    tnlSlicedEllpackMatrix();
 
diff --git a/src/matrices/tnlTridiagonalMatrix.h b/src/matrices/tnlTridiagonalMatrix.h
index 7bcafd79e0..b7f77ba2fd 100644
--- a/src/matrices/tnlTridiagonalMatrix.h
+++ b/src/matrices/tnlTridiagonalMatrix.h
@@ -36,6 +36,9 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index >
    typedef Index IndexType;
    typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector;
    typedef tnlTridiagonalMatrix< Real, Device, Index > ThisType;
+   typedef tnlTridiagonalMatrix< Real, tnlHost, Index > HostType;
+   typedef tnlTridiagonalMatrix< Real, tnlCuda, Index > CudaType;
+
 
    tnlTridiagonalMatrix();
 
diff --git a/src/solvers/linear/krylov/tnlGMRESSolver.h b/src/solvers/linear/krylov/tnlGMRESSolver.h
index 0fbf069db5..4ccee98ee4 100644
--- a/src/solvers/linear/krylov/tnlGMRESSolver.h
+++ b/src/solvers/linear/krylov/tnlGMRESSolver.h
@@ -122,10 +122,11 @@ extern template class tnlGMRESSolver< tnlMultiDiagonalMatrix< double, tnlHost, l
 
 
 #ifdef HAVE_CUDA
-extern template class tnlGMRESSolver< tnlCSRMatrix< float,  tnlCuda, int > >;
+// TODO: fix this - does not work with CUDA 5.5
+/*extern template class tnlGMRESSolver< tnlCSRMatrix< float,  tnlCuda, int > >;
 extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, int > >;
 extern template class tnlGMRESSolver< tnlCSRMatrix< float,  tnlCuda, long int > >;
-extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >;
+extern template class tnlGMRESSolver< tnlCSRMatrix< double, tnlCuda, long int > >;*/
 
 /*extern template class tnlGMRESSolver< tnlEllpackMatrix< float,  tnlCuda, int > >;
 extern template class tnlGMRESSolver< tnlEllpackMatrix< double, tnlCuda, int > >;
diff --git a/tests/benchmarks/tnl-benchmark-linear-solvers.h b/tests/benchmarks/tnl-benchmark-linear-solvers.h
index 82f506dee8..1d5fa65232 100644
--- a/tests/benchmarks/tnl-benchmark-linear-solvers.h
+++ b/tests/benchmarks/tnl-benchmark-linear-solvers.h
@@ -34,6 +34,9 @@
 #include <matrices/tnlChunkedEllpackMatrix.h>
 #include <matrices/tnlMatrixReader.h>
 #include <solvers/linear/krylov/tnlGMRESSolver.h>
+#include <solvers/linear/krylov/tnlCGSolver.h>
+#include <solvers/linear/krylov/tnlBICGStabSolver.h>
+#include <solvers/linear/krylov/tnlTFQMRSolver.h>
 #include <solvers/linear/tnlLinearResidueGetter.h>
 #include <solvers/tnlIterativeSolverMonitor.h>
 
@@ -119,6 +122,15 @@ bool resolveLinearSolver( const tnlParameterContainer& parameters )
    if( solver == "gmres" )
       return benchmarkSolver< tnlGMRESSolver< Matrix > >( parameters, matrix );
 
+   if( solver == "cg" )
+      return benchmarkSolver< tnlCGSolver< Matrix > >( parameters, matrix );
+
+   if( solver == "bicgstab" )
+      return benchmarkSolver< tnlBICGStabSolver< Matrix > >( parameters, matrix );
+
+   if( solver == "tfqmr" )
+      return benchmarkSolver< tnlTFQMRSolver< Matrix > >( parameters, matrix );
+
    cerr << "Unknown solver " << solver << "." << endl;
    return false;
 }
@@ -162,8 +174,8 @@ bool resolveDevice( const tnlParameterContainer& parameters )
    if( device == "host" )
       return resolveMatrixFormat< Real, tnlHost >( parameters );
 
-   //if( device == "cuda" )
-   //   return resolveMatrixFormat< Real, tnlCuda >( parameters );
+   if( device == "cuda" )
+      return resolveMatrixFormat< Real, tnlCuda >( parameters );
 
    cerr << "Uknown device " << device << "." << endl;
    return false;
-- 
GitLab