diff --git a/buildAll b/buildAll
index 3986a9ea1db990dc0960376c7c614a5f44b7ff45..d553862bd5fdf6878b16afb67f934c1424acfe75 100755
--- a/buildAll
+++ b/buildAll
@@ -2,7 +2,7 @@
 
 TARGET=TNL
 INSTALL_PREFIX=${HOME}/local
-WITH_CUDA=no
+WITH_CUDA=yes
 WITH_CUSPARSE=no
 CUDA_ARCHITECTURE=2.0
 VERBOSE=1
diff --git a/src/implementation/core/CMakeLists.txt b/src/implementation/core/CMakeLists.txt
index 2ad45839048ae18618d506adb3a25d07e6572d70..61cb7f39f7925798267f1e9dccd69a35bbdbc14e 100755
--- a/src/implementation/core/CMakeLists.txt
+++ b/src/implementation/core/CMakeLists.txt
@@ -33,7 +33,9 @@ set( common_SOURCES
      ${CURRENT_DIR}/tnlMultiArray_impl.cpp
      ${CURRENT_DIR}/tnlMultiVector_impl.cpp
      ${CURRENT_DIR}/tnlSharedVector_impl.cpp
-     ${CURRENT_DIR}/tnlVector_impl.cpp )     
+     ${CURRENT_DIR}/tnlVector_impl.cpp 
+     ${CURRENT_DIR}/tnlArray_impl.cpp
+     ${CURRENT_DIR}/tnlHost_impl.cpp )       
 
 IF( BUILD_CUDA )
    set( tnl_implementation_core_CUDA__SOURCES
@@ -44,15 +46,12 @@ IF( BUILD_CUDA )
         ${CURRENT_DIR}/tnlVector_impl.cu 
         PARENT_SCOPE )
 ENDIF()    
+
 set( tnl_implementation_core_SOURCES     
      ${tnl_implementation_core_cuda_SOURCES}
      ${common_SOURCES}
      ${CURRENT_DIR}/memory-operations_impl.cpp
-     ${CURRENT_DIR}/tnlArray_impl.cpp
-     ${CURRENT_DIR}/tnlHost_impl.cpp
      PARENT_SCOPE )
-    
-    
         
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/core )
 
diff --git a/src/implementation/core/cuda/CMakeLists.txt b/src/implementation/core/cuda/CMakeLists.txt
index 54f83098f4c7bb940391702a4b68a08c87bbffc6..85f49cf69dbaa93f75856e76debe5ea04b987f6d 100755
--- a/src/implementation/core/cuda/CMakeLists.txt
+++ b/src/implementation/core/cuda/CMakeLists.txt
@@ -1,21 +1,18 @@
 SET( headers cuda-reduction_impl.h )
 
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/core/cuda )
-
+set( common_SOURCES ${CURRENT_DIR}/device-check.cpp ) 
 IF( BUILD_CUDA )
    set( tnl_implementation_core_cuda_CUDA__SOURCES
+        ${common_SOURCES}
         ${CURRENT_DIR}/cuda-reduction_impl.cu
         PARENT_SCOPE )        
-else() 
-   set( tnl_implementation_core_cuda_SOURCES
-        ${CURRENT_DIR}/cuda-reduction_impl.cpp
-        PARENT_SCOPE )               
-endif()        
-
-set( tnl_implementation_core_cuda_SOURCES 
-     ${tnl_implementation_core_cuda_SOURCES}
-     ${CURRENT_DIR}/device-check.cpp
-     PARENT_SCOPE )
+endif() 
 
+set( tnl_implementation_core_cuda_SOURCES
+     ${common_SOURCES}
+     ${CURRENT_DIR}/cuda-reduction_impl.cpp     
+     PARENT_SCOPE )               
+        
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/core/cuda )
 
diff --git a/src/implementation/core/memory-operations.h b/src/implementation/core/memory-operations.h
index 09a721996b0ae099bc98d6c0fb7f974baa5a60fc..5c623123032e2e3a51c416a7c81a2d3f8587e94c 100644
--- a/src/implementation/core/memory-operations.h
+++ b/src/implementation/core/memory-operations.h
@@ -43,6 +43,7 @@ bool allocateMemoryCuda( Element*& data,
       data = 0;
    return checkCudaDevice;
 #else
+   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
    return false;
 #endif
 }
@@ -61,7 +62,7 @@ bool freeMemoryCuda( Element* data )
       cudaFree( data );
       return checkCudaDevice;
 #else
-   cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+   cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
    return true;
 #endif
 }
@@ -111,7 +112,7 @@ bool setMemoryCuda( Element* data,
 
       return checkCudaDevice;
 #else
-      cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
+      cerr << "CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
       return false;
 #endif
 
@@ -264,4 +265,50 @@ bool compareMemoryCuda( const Element* deviceData1,
 #endif
 }
 
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+#ifdef HAVE_CUDA
+extern template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const int size );
+
+extern template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const long int size );
+
+extern template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const long int size );
+
+#endif
+#endif
+
 #endif /* MEMORYFUNCTIONS_H_ */
diff --git a/src/implementation/core/memory-operations_impl.cu b/src/implementation/core/memory-operations_impl.cu
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..20e90befeeb7867d61afe6e7548a4367dccc9800 100644
--- a/src/implementation/core/memory-operations_impl.cu
+++ b/src/implementation/core/memory-operations_impl.cu
@@ -0,0 +1,62 @@
+/***************************************************************************
+                          memory_operations_impl.cu  -  description
+                             -------------------
+    begin                : Mar 24, 2013
+    copyright            : (C) 2013 by Tomas Oberhuber
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <implementation/core/memory-operations.h>
+
+#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
+
+template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const int size );
+                                    
+template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const int size );
+
+template bool copyMemoryCudaToHost( char* destination,
+                                    const char* source,
+                                    const long int size );
+                                    
+template bool copyMemoryCudaToHost( int* destination,
+                                    const int* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( long int* destination,
+                                    const long int* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( float* destination,
+                                    const float* source,
+                                    const long int size );
+
+template bool copyMemoryCudaToHost( double* destination,
+                                    const double* source,
+                                    const long int size );
+                                                                        
+#endif                                    
\ No newline at end of file
diff --git a/src/implementation/solvers/CMakeLists.txt b/src/implementation/solvers/CMakeLists.txt
index 2f5de5cbe1b45e8bed29123c880f47219906763a..d210c368503dd48f42a52a07f3632537454c85da 100755
--- a/src/implementation/solvers/CMakeLists.txt
+++ b/src/implementation/solvers/CMakeLists.txt
@@ -8,11 +8,22 @@ SET( headers tnlIterativeSolver_impl.h
              tnlSolverStarter_impl.h
              tnlSolverInitiator_impl.h )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers ) 
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers )
+set( common_SOURCES ${CURRENT_DIR}/tnlIterativeSolver_impl.cpp )
+
+if( BUILD_CUDA)
+      set( tnl_implementation_solvers_CUDA__SOURCES
+        ${tnl_implementation_solvers_linear_CUDA__SOURCES}
+        ${tnl_implementation_solvers_ode_CUDA__SOURCES}
+        ${tnl_implementation_solvers_pde_CUDA__SOURCES}
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()
+
 set( tnl_implementation_solvers_SOURCES
      ${tnl_implementation_solvers_linear_SOURCES}
      ${tnl_implementation_solvers_ode_SOURCES}
-     ${CURRENT_DIR}/tnlIterativeSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
 
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers )
diff --git a/src/implementation/solvers/linear/CMakeLists.txt b/src/implementation/solvers/linear/CMakeLists.txt
index 98771f090dd9b60e2df445a85adfcd405e862834..6fde550e5737392673a7f7ce6bae20849e78660b 100755
--- a/src/implementation/solvers/linear/CMakeLists.txt
+++ b/src/implementation/solvers/linear/CMakeLists.txt
@@ -4,9 +4,18 @@ ADD_SUBDIRECTORY( stationary )
 SET( headers tnlLinearResidueGetter_impl.h
    )
    
-set( tnl_implementation_solvers_linear_SOURCES
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear )
+   
+set( tnl_implementation_solvers_linear_stationary_SOURCES
      ${tnl_implementation_solvers_linear_krylov_SOURCES}
      ${tnl_implementation_solvers_linear_stationary_SOURCES}
      PARENT_SCOPE )
+
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_CUDA__SOURCES
+        ${tnl_implementation_solvers_linear_krylov_CUDA__SOURCES}
+        ${tnl_implementation_solvers_linear_stationary_CUDA__SOURCES}
+        PARENT_SCOPE )
+endif()
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear )
\ No newline at end of file
diff --git a/src/implementation/solvers/linear/krylov/CMakeLists.txt b/src/implementation/solvers/linear/krylov/CMakeLists.txt
index daa9d4a83c8b00f585e34a99e6759884ea4457cb..4b99141c0c25f54f69a419df5071f0b375ea3dcf 100644
--- a/src/implementation/solvers/linear/krylov/CMakeLists.txt
+++ b/src/implementation/solvers/linear/krylov/CMakeLists.txt
@@ -3,9 +3,16 @@ SET( headers tnlCGSolver_impl.h
              tnlGMRESSolver_impl.h
    )
 
-SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/krylov )    
+SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/krylov )
+set( common_SOURCES ${CURRENT_DIR}/tnlGMRESSolver_impl.cpp )     
 set( tnl_implementation_solvers_linear_krylov_SOURCES
-     ${CURRENT_DIR}/tnlGMRESSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
+
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_krylov_CUDA__SOURCES
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif() 
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear/krylov )
diff --git a/src/implementation/solvers/linear/stationary/CMakeLists.txt b/src/implementation/solvers/linear/stationary/CMakeLists.txt
index b6e94351a9c29fd53580a31c1360db06e74a1398..67064405e733d6be4511b93a208270da5de6ceb2 100644
--- a/src/implementation/solvers/linear/stationary/CMakeLists.txt
+++ b/src/implementation/solvers/linear/stationary/CMakeLists.txt
@@ -2,8 +2,16 @@ SET( headers tnlSORSolver_impl.h
    )
    
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/solvers/linear/stationary )    
+set( common_SOURCES ${CURRENT_DIR}/tnlSORSolver_impl.cpp )
+
 set( tnl_implementation_solvers_linear_stationary_SOURCES
-     ${CURRENT_DIR}/tnlSORSolver_impl.cpp
+     ${common_SOURCES}
      PARENT_SCOPE )
+     
+if( BUILD_CUDA)
+   set( tnl_implementation_solvers_linear_stationary_CUDA__SOURCES
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()     
    
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/implementation/solvers/linear/stationary )
diff --git a/src/matrix/CMakeLists.txt b/src/matrix/CMakeLists.txt
index 53140264d5faf44983fa155da411f234bdd8c953..f690d8323966e4965759c1002ea9c3e45a033811 100755
--- a/src/matrix/CMakeLists.txt
+++ b/src/matrix/CMakeLists.txt
@@ -13,11 +13,16 @@ SET( headers tnlAdaptiveRgCSRMatrix.h
 	)
 	
 SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/matrix )
+set( common_SOURCES ${CURRENT_DIR}/tnlMatrix.cpp )
+
 SET( tnl_matrix_SOURCES 
-     ${CURRENT_DIR}/tnlMatrix.cpp 
+     ${common_SOURCES}
      PARENT_SCOPE )
 
-#SET( libtnlmatrixincludedir  ${TNL_INCLUDE_DIR}/matrix )
-#SET( libtnlmatrixinclude_HEADERS ${headers} )
+if( BUILD_CUDA )
+   SET( tnl_matrix_CUDA__SOURCES 
+        ${common_SOURCES}
+        PARENT_SCOPE )
+endif()
 
 INSTALL( FILES ${headers} DESTINATION include/tnl-${tnlVersion}/matrix )
\ No newline at end of file