From e200f4b2e2bb65d7eae666892f6218da3b6a299e Mon Sep 17 00:00:00 2001
From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz>
Date: Mon, 25 Mar 2013 22:36:10 +0100
Subject: [PATCH] Debugging of explicit instantiations for CUDA memory
 operations.

---
 buildAll                                      |  2 +-
 src/implementation/core/CMakeLists.txt        |  9 ++-
 src/implementation/core/cuda/CMakeLists.txt   | 19 +++---
 src/implementation/core/memory-operations.h   | 51 ++++++++++++++-
 .../core/memory-operations_impl.cu            | 62 +++++++++++++++++++
 src/implementation/solvers/CMakeLists.txt     | 15 ++++-
 .../solvers/linear/CMakeLists.txt             | 11 +++-
 .../solvers/linear/krylov/CMakeLists.txt      | 11 +++-
 .../solvers/linear/stationary/CMakeLists.txt  | 10 ++-
 src/matrix/CMakeLists.txt                     | 11 +++-
 10 files changed, 173 insertions(+), 28 deletions(-)

diff --git a/buildAll b/buildAll
index 3986a9ea1d..d553862bd5 100755
--- a/buildAll
+++ b/buildAll
@@ -2,7 +2,7 @@
 
 TARGET=TNL
 INSTALL_PREFIX=${HOME}/local
-WITH_CUDA=no
+WITH_CUDA=yes
 WITH_CUSPARSE=no
 CUDA_ARCHITECTURE=2.0
 VERBOSE=1
diff --git a/src/implementation/core/CMakeLists.txt b/src/implementation/core/CMakeLists.txt
index 2ad4583904..61cb7f39f7 100755
--- a/src/implementation/core/CMakeLists.txt
+++ b/src/implementation/core/CMakeLists.txt
@@ -33,7 +33,9 @@ set( common_SOURCES
      ${CURRENT_DIR}/tnlMultiArray_impl.cpp
      ${CURRENT_DIR}/tnlMultiVector_impl.cpp
      ${CURRENT_DIR}/tnlSharedVector_impl.cpp
-     ${CURRENT_DIR}/tnlVector_impl.cpp )     
+     ${CURRENT_DIR}/tnlVector_impl.cpp 
+     ${CURRENT_DIR}/tnlArray_impl.cpp
+     ${CURRENT_DIR}/tnlHost_impl.cpp )       
 
 IF( BUILD_CUDA )
    set( tnl_implementation_core_CUDA__SOURCES
@@ -44,15 +46,12 @@ IF( BUILD_CUDA )
         ${CURRENT_DIR}/tnlVector_impl.cu 
         PARENT_SCOPE )
 ENDIF()    
+
 set( tnl_implementation_core_SOURCES     
      ${tnl_implementation_core_cuda_SOURCES}
      ${common_SOURCES}
      ${CURRENT_DIR}/memory-operations_impl.cpp
-     ${CURRENT_DIR}/tnlArray_impl.cpp
-     ${CURRENT_DIR}/tnlHost_impl.cpp
      PARENT_SCOPE )
-    
-    
         
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/core )
 
diff --git a/src/implementation/core/cuda/CMakeLists.txt b/src/implementation/core/cuda/CMakeLists.txt
index 54f83098f4..85f49cf69d 100755
--- a/src/implementation/core/cuda/CMakeLists.txt
+++ b/src/implementation/core/cuda/CMakeLists.txt
@@ -1,21 +1,18 @@
 SET( headers cuda-reduction_impl.h )
 
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core/cuda )
-
+set( common_SOURCES ${CURRENT_DIR}/device-check.cpp ) 
 IF( BUILD_CUDA )
    set( tnl_implementation_core_cuda_CUDA__SOURCES
+        ${common_SOURCES}
         ${CURRENT_DIR}/cuda-reduction_impl.cu
         PARENT_SCOPE )        
-else() 
-   set( tnl_implementation_core_cuda_SOURCES
-        ${CURRENT_DIR}/cuda-reduction_impl.cpp
-        PARENT_SCOPE )               
-endif()        
-
-set( tnl_implementation_core_cuda_SOURCES 
-     ${tnl_implementation_core_cuda_SOURCES}
-     ${CURRENT_DIR}/device-check.cpp
-     PARENT_SCOPE )
+endif() 
 
+set( tnl_implementation_core_cuda_SOURCES
+     ${common_SOURCES}
+     ${CURRENT_DIR}/cuda-reduction_impl.cpp     
+     PARENT_SCOPE )               
+        
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/core/cuda )
 
diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h
index 09a721996b..5c62312303 100644
--- a/src/implementation/core/memory-operations.h
+++ b/src/implementation/core/memory-operations.h
@@ -43,6 +43,7 @@ bool allocateMemoryCuda( Element*& data,
       data = 0;
    return checkCudaDevice;
 #else
+   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
    return false;
 #endif
 }
@@ -61,7 +62,7 @@ bool freeMemoryCuda( Element* data )
       cudaFree( data );
       return checkCudaDevice;
 #else
-   cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
    return true;
 #endif
 }
@@ -111,7 +112,7 @@ bool setMemoryCuda( Element* data,
 
       return checkCudaDevice;
 #else
-      cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+      cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
       return false;
 #endif
 
@@ -264,4 +265,50 @@ bool compareMemoryCuda( const Element* deviceData1,
 #endif
 }
 
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+#ifdef HAVE_CUDA
+extern template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const long int size );
+
+#endif
+#endif
+
 #endif /* MEMORYFUNCTIONS_H_ */
diff --git a/src/implementation/core/memory-operations_impl.cu b/src/implementation/core/memory-operations_impl.cu
index e69de29bb2..20e90befee 100644
--- a/src/implementation/core/memory-operations_impl.cu
+++ b/src/implementation/core/memory-operations_impl.cu
@@ -0,0 +1,62 @@
+/***************************************************************************
+                          memory_operations_impl.cu  -  description
+                             -------------------
+    begin                : Mar 24, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <implementation/core/memory-operations.h>
+
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const int size );
+                                    
+template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const long int size );
+                                    
+template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const long int size );
+                                                                        
+#endif                                    
\ No newline at end of file
diff --git a/src/implementation/solvers/CMakeLists.txt b/src/implementation/solvers/CMakeLists.txt
index 2f5de5cbe1..d210c36850 100755
--- a/src/implementation/solvers/CMakeLists.txt
+++ b/src/implementation/solvers/CMakeLists.txt
@@ -8,11 +8,22 @@ SET( headers tnlIterativeSolver_impl.h
              tnlSolverStarter_impl.h
              tnlSolverInitiator_impl.h )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers ) 
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers )
+set( common_SOURCES ${CURRENT_DIR}/tnlIterativeSolver_impl.cpp )
+
+if( BUILD_CUDA)
+      set( tnl_implementation_solvers_CUDA__SOURCES
+        ${tnl_implementation_solvers_linear_CUDA__SOURCES}
+        ${tnl_implementation_solvers_ode_CUDA__SOURCES}
+        ${tnl_implementation_solvers_pde_CUDA__SOURCES}
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()
+
 set( tnl_implementation_solvers_SOURCES
      ${tnl_implementation_solvers_linear_SOURCES}
      ${tnl_implementation_solvers_ode_SOURCES}
-     ${CURRENT_DIR}/tnlIterativeSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
 
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers )
diff --git a/src/implementation/solvers/linear/CMakeLists.txt b/src/implementation/solvers/linear/CMakeLists.txt
index 98771f090d..6fde550e57 100755
--- a/src/implementation/solvers/linear/CMakeLists.txt
+++ b/src/implementation/solvers/linear/CMakeLists.txt
@@ -4,9 +4,18 @@ ADD_SUBDIRECTORY( stationary )
 SET( headers tnlLinearResidueGetter_impl.h
    )
    
-set( tnl_implementation_solvers_linear_SOURCES
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear )
+   
+set( tnl_implementation_solvers_linear_stationary_SOURCES
      ${tnl_implementation_solvers_linear_krylov_SOURCES}
      ${tnl_implementation_solvers_linear_stationary_SOURCES}
      PARENT_SCOPE )
+
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_CUDA__SOURCES
+        ${tnl_implementation_solvers_linear_krylov_CUDA__SOURCES}
+        ${tnl_implementation_solvers_linear_stationary_CUDA__SOURCES}
+        PARENT_SCOPE )
+endif()
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear )
\ No newline at end of file
diff --git a/src/implementation/solvers/linear/krylov/CMakeLists.txt b/src/implementation/solvers/linear/krylov/CMakeLists.txt
index daa9d4a83c..4b99141c0c 100644
--- a/src/implementation/solvers/linear/krylov/CMakeLists.txt
+++ b/src/implementation/solvers/linear/krylov/CMakeLists.txt
@@ -3,9 +3,16 @@ SET( headers tnlCGSolver_impl.h
              tnlGMRESSolver_impl.h
    )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/krylov )    
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/krylov )
+set( common_SOURCES ${CURRENT_DIR}/tnlGMRESSolver_impl.cpp )     
 set( tnl_implementation_solvers_linear_krylov_SOURCES
-     ${CURRENT_DIR}/tnlGMRESSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
+
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_krylov_CUDA__SOURCES
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif() 
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear/krylov )
diff --git a/src/implementation/solvers/linear/stationary/CMakeLists.txt b/src/implementation/solvers/linear/stationary/CMakeLists.txt
index b6e94351a9..67064405e7 100644
--- a/src/implementation/solvers/linear/stationary/CMakeLists.txt
+++ b/src/implementation/solvers/linear/stationary/CMakeLists.txt
@@ -2,8 +2,16 @@ SET( headers tnlSORSolver_impl.h
    )
    
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/stationary )    
+set( common_SOURCES ${CURRENT_DIR}/tnlSORSolver_impl.cpp )
+
 set( tnl_implementation_solvers_linear_stationary_SOURCES
-     ${CURRENT_DIR}/tnlSORSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
+     
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_stationary_CUDA__SOURCES
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()     
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear/stationary )
diff --git a/src/matrix/CMakeLists.txt b/src/matrix/CMakeLists.txt
index 53140264d5..f690d83239 100755
--- a/src/matrix/CMakeLists.txt
+++ b/src/matrix/CMakeLists.txt
@@ -13,11 +13,16 @@ SET( headers tnlAdaptiveRgCSRMatrix.h
 	)
 	
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/matrix )
+set( common_SOURCES ${CURRENT_DIR}/tnlMatrix.cpp )
+
 SET( tnl_matrix_SOURCES 
-     ${CURRENT_DIR}/tnlMatrix.cpp 
+     ${common_SOURCES}
      PARENT_SCOPE )
 
-#SET( libtnlmatrixincludedir  ${TNL_INCLUDE_DIR}/matrix )
-#SET( libtnlmatrixinclude_HEADERS ${headers} )
+if( BUILD_CUDA )
+   SET( tnl_matrix_CUDA__SOURCES 
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()
 
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/matrix )
\ No newline at end of file
-- 
GitLab