diff --git a/src/TNL/Containers/Algorithms/CMakeLists.txt b/src/TNL/Containers/Algorithms/CMakeLists.txt
index a8c9bf7699f129c21c4930df090af5c7d5910325..c63837351076bdf4996016d685abf4fdb163033b 100644
--- a/src/TNL/Containers/Algorithms/CMakeLists.txt
+++ b/src/TNL/Containers/Algorithms/CMakeLists.txt
@@ -1,5 +1,3 @@
-ADD_SUBDIRECTORY( TemplateExplicitInstantiation )
-
 set( headers ArrayOperations.h
              ArrayOperationsHost_impl.h
              ArrayOperationsCuda_impl.h
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp
deleted file mode 100644
index e3225d4568d1d209abc314816d4c9b88cfb60e62..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {    
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Cuda >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Cuda >::getMemoryElement< char        >( const char* data );
-template int         ArrayOperations< Devices::Cuda >::getMemoryElement< int         >( const int* data );
-template long int    ArrayOperations< Devices::Cuda >::getMemoryElement< long int    >( const long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Cuda >::getMemoryElement< float       >( const float* data );
-#endif
-template double      ArrayOperations< Devices::Cuda >::getMemoryElement< double      >( const double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu
deleted file mode 100644
index 73affc5cc5dbdf488be5f91379bbd5170d534771..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsCuda_impl.cu
+++ /dev/null
@@ -1,248 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsCuda_impl.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Cuda >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Cuda >::getMemoryElement< char        >( const char* data );
-template int         ArrayOperations< Devices::Cuda >::getMemoryElement< int         >( const int* data );
-template long int    ArrayOperations< Devices::Cuda >::getMemoryElement< long int    >( const long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Cuda >::getMemoryElement< float       >( const float* data );
-#endif
-template double      ArrayOperations< Devices::Cuda >::getMemoryElement< double      >( const double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Cuda >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Cuda >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Cuda >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp
deleted file mode 100644
index 2c60d95d832545a92ad9ffb4aba7b5eaf285c4c5..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsHost_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {    
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Host >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Host >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Host >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Host >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Host >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Host >::getMemoryElement< char        >( char* data );
-template int         ArrayOperations< Devices::Host >::getMemoryElement< int         >( int* data );
-template long int    ArrayOperations< Devices::Host >::getMemoryElement< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Host >::getMemoryElement< float       >( float* data );
-#endif
-template double      ArrayOperations< Devices::Host >::getMemoryElement< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Host >::getMemoryElement< long double >( long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu
deleted file mode 100644
index 65fdbae2d7731d83c4d2af177e9a6ce070ec5d60..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/ArrayOperationsHost_impl.cu
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************
-                          ArrayOperationsHost_impl.cu  -  description
-                             -------------------
-    begin                : Jul 16, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/ArrayOperations.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        int >( char*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         int >( int*& data, const int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    int >( long int*& data, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       int >( float*& data, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      int >( double*& data, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, int >( long double*& data, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::allocateMemory< char,        long int >( char*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< int,         long int >( int*& data, const long int size );
-template bool ArrayOperations< Devices::Host >::allocateMemory< long int,    long int >( long int*& data, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::allocateMemory< float,       long int >( float*& data, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::allocateMemory< double,      long int >( double*& data, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::allocateMemory< long double, long int >( long double*& data, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::freeMemory< char        >( char* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< int         >( int* data );
-template bool ArrayOperations< Devices::Host >::freeMemory< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::freeMemory< float       >( float* data );
-#endif
-template bool ArrayOperations< Devices::Host >::freeMemory< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::freeMemory< long double >( long double* data );
-#endif
-
-template void ArrayOperations< Devices::Host >::setMemoryElement< char        >( char* data, const char& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< int         >( int* data, const int& value );
-template void ArrayOperations< Devices::Host >::setMemoryElement< long int    >( long int* data, const long int& value );
-#ifdef INSTANTIATE_FLOAT
-template void ArrayOperations< Devices::Host >::setMemoryElement< float       >( float* data, const float& value );
-#endif
-template void ArrayOperations< Devices::Host >::setMemoryElement< double      >( double* data, const double& value );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template void ArrayOperations< Devices::Host >::setMemoryElement< long double >( long double* data, const long double& value );
-#endif
-
-template char        ArrayOperations< Devices::Host >::getMemoryElement< char        >( char* data );
-template int         ArrayOperations< Devices::Host >::getMemoryElement< int         >( int* data );
-template long int    ArrayOperations< Devices::Host >::getMemoryElement< long int    >( long int* data );
-#ifdef INSTANTIATE_FLOAT
-template float       ArrayOperations< Devices::Host >::getMemoryElement< float       >( float* data );
-#endif
-template double      ArrayOperations< Devices::Host >::getMemoryElement< double      >( double* data );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double ArrayOperations< Devices::Host >::getMemoryElement< long double >( long double* data );
-#endif
-
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
-template bool ArrayOperations< Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
-template bool ArrayOperations< Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
-#endif
-#endif
-
-template bool ArrayOperations< Devices::Host >::setMemory< char,        int >( char* destination, const char& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         int >( int* destination, const int& value, const int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       int >( float* destination, const float& value, const int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      int >( double* destination, const double& value, const int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool ArrayOperations< Devices::Host >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
-template bool ArrayOperations< Devices::Host >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
-#ifdef INSTANTIATE_FLOAT
-template bool ArrayOperations< Devices::Host >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
-#endif
-template bool ArrayOperations< Devices::Host >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool ArrayOperations< Devices::Host >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt
deleted file mode 100644
index 49409b2ab595a7c47e689f87b469785e32c80fed..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/CMakeLists.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-if( ${WITH_TEMPLATES_INSTANTIATION} )
-
-   SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation )
-   set( common_SOURCES
-        ${CURRENT_DIR}/VectorOperationsHost_impl.cpp
-   )
-   IF( BUILD_CUDA )
-      set( tnl_core_cuda_CUDA__SOURCES
-           ${common_SOURCES}
-           ${CURRENT_DIR}/ArrayOperationsHost_impl.cu
-           ${CURRENT_DIR}/ArrayOperationsCuda_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-abs-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-and_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-or_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-l2-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-lp-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-equalities_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-inequalities_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-scalar-product_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-sum_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-min_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-abs-max_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-l2-norm_impl.cu
-           ${CURRENT_DIR}/cuda-reduction-diff-lp-norm_impl.cu
-           ${CURRENT_DIR}/cuda-prefix-sum_impl.cu
-           ${CURRENT_DIR}/VectorOperationsCuda_impl.cu
-           PARENT_SCOPE )
-   ELSE()
-      set( common_SOURCES
-           ${common_SOURCES}
-           ${CURRENT_DIR}/ArrayOperationsHost_impl.cpp
-           ${CURRENT_DIR}/ArrayOperationsCuda_impl.cpp
-      )
-   ENDIF()
-
-   set( tnl_core_cuda_SOURCES
-        ${common_SOURCES}
-        ${CURRENT_DIR}/cuda-reduction_impl.cpp
-        PARENT_SCOPE )
-endif()
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp
deleted file mode 100644
index 7263405cfdea5e465604044fe33060341029908f..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsCuda_impl.cpp  -  description
-                             -------------------
-    begin                : Dec 10, 2015
-    copyright            : (C) 2015 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Containers {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
- 
-#endif
- 
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu
deleted file mode 100644
index 99d715a7516271f2dc882a9def91ccb2f9493b5c..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsCuda_impl.cu
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsCuda_impl.cu  -  description
-                             -------------------
-    begin                : Jul 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Vectors {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL2Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorL1Norm( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
- 
-#endif
- 
-} // namespace Vectors
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp
deleted file mode 100644
index 9803167c6d5cc257fb215840af3f39d1a32e3d23..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/VectorOperationsHost_impl.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-/***************************************************************************
-                          VectorOperationsHost_impl.cpp  -  description
-                             -------------------
-    begin                : Jul 20, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/VectorOperations.h>
-
-namespace TNL {
-namespace Containers {    
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * L1 norm
- */
-template int         VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL1Norm( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * L2 norm
- */
-template int         VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorL2Norm( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-
-/****
- * Lp norm
- */
-template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, int >& v, const int& p );
-template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, long int >& v, const int& p );
-template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, long int >& v, const float& p );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, long int >& v, const long double& p );
-#endif
-#endif
-
-
-
-/****
- * Sum
- */
-template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, long int >& v );
-template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, long int >& v );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-
-#endif
-
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu
deleted file mode 100644
index 466654f2d0dd169119b4da2305999548123fca92..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-prefix-sum_impl.cu
+++ /dev/null
@@ -1,90 +0,0 @@
-/***************************************************************************
-                          cuda-prefix-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 18, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/cuda-prefix-sum.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const int *deviceInput,
-                             int* deviceOutput,
-                             tnlParallelReductionSum< int, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-
-#ifdef INSTANTIATE_FLOAT
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const float *deviceInput,
-                             float* deviceOutput,
-                             tnlParallelReductionSum< float, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const double *deviceInput,
-                             double* deviceOutput,
-                             tnlParallelReductionSum< double, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool cudaPrefixSum( const int size,
-                             const int blockSize,
-                             const long double *deviceInput,
-                             long double* deviceOutput,
-                             tnlParallelReductionSum< long double, int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const int *deviceInput,
-                             int* deviceOutput,
-                             tnlParallelReductionSum< int, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-
-#ifdef INSTANTIATE_FLOAT
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const float *deviceInput,
-                             float* deviceOutput,
-                             tnlParallelReductionSum< float, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const double *deviceInput,
-                             double* deviceOutput,
-                             tnlParallelReductionSum< double, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool cudaPrefixSum( const long int size,
-                             const long int blockSize,
-                             const long double *deviceInput,
-                             long double* deviceOutput,
-                             tnlParallelReductionSum< long double, long int >& operation,
-                             const enumPrefixSumType prefixSumType );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu
deleted file mode 100644
index f4569b196c1b0f56af4d10690d88c4ab320bb780..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-max_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > >
-                                   ( tnlParallelReductionAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > >
-                                   ( tnlParallelReductionAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > >
-                                   ( tnlParallelReductionAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > >
-                                   ( tnlParallelReductionAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > >
-                                   ( tnlParallelReductionAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
-                                   ( tnlParallelReductionAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > >
-                                   ( tnlParallelReductionAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > >
-                                   ( tnlParallelReductionAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > >
-                                   ( tnlParallelReductionAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > >
-                                   ( tnlParallelReductionAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu
deleted file mode 100644
index 6206cba87118ad2b347c516ca5896f1eb7a0dcb4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-min_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > >
-                                   ( tnlParallelReductionAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > >
-                                   ( tnlParallelReductionAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > >
-                                   ( tnlParallelReductionAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > >
-                                   ( tnlParallelReductionAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > >
-                                   ( tnlParallelReductionAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > >
-                                   ( tnlParallelReductionAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > >
-                                   ( tnlParallelReductionAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > >
-                                   ( tnlParallelReductionAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > >
-                                   ( tnlParallelReductionAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > >
-                                   ( tnlParallelReductionAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu
deleted file mode 100644
index 15819cb4b2e111a6304e1e9c3c2a64d6a914c369..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-abs-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-abs-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > >
-                                   ( tnlParallelReductionAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > >
-                                   ( tnlParallelReductionAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > >
-                                   ( tnlParallelReductionAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > >
-                                   ( tnlParallelReductionAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > >
-                                   ( tnlParallelReductionAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, int> :: ResultType& result );
-#endif
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > >
-                                   ( tnlParallelReductionAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, long int > :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > >
-                                   ( tnlParallelReductionAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > >
-                                   ( tnlParallelReductionAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > >
-                                   ( tnlParallelReductionAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > >
-                                   ( tnlParallelReductionAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu
deleted file mode 100644
index edb30509c62de2803b8bba24d24f3b973aed4a33..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-and_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-and_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Logical AND
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > >
-                                   ( tnlParallelReductionLogicalAnd< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > >
-                                   ( tnlParallelReductionLogicalAnd< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > >
-                                   ( tnlParallelReductionLogicalAnd< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > >
-                                   ( tnlParallelReductionLogicalAnd< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > >
-                                   ( tnlParallelReductionLogicalAnd< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > >
-                                   ( tnlParallelReductionLogicalAnd< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > >
-                                   ( tnlParallelReductionLogicalAnd< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > >
-                                   ( tnlParallelReductionLogicalAnd< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > >
-                                   ( tnlParallelReductionLogicalAnd< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > >
-                                   ( tnlParallelReductionLogicalAnd< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu
deleted file mode 100644
index d402b1b490660b58df4c76227867944190779559..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff abs max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > >
-                                   ( tnlParallelReductionDiffAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > >
-                                   ( tnlParallelReductionDiffAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > >
-                                   ( tnlParallelReductionDiffAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > >
-                                   ( tnlParallelReductionDiffAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > >
-                                   ( tnlParallelReductionDiffAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu
deleted file mode 100644
index f954631a6677013319d9e250fe3a6892cf06abcc..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-min_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-   
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-
-/****
- * Diff abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > >
-                                   ( tnlParallelReductionDiffAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > >
-                                   ( tnlParallelReductionDiffAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > >
-                                   ( tnlParallelReductionDiffAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > >
-                                   ( tnlParallelReductionDiffAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > >
-                                   ( tnlParallelReductionDiffAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu
deleted file mode 100644
index 3e87fd7c8ec204376bea0db88ffa85282d792390..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-abs-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-abs-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > >
-                                   ( tnlParallelReductionDiffAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > >
-                                   ( tnlParallelReductionDiffAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > >
-                                   ( tnlParallelReductionDiffAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > >
-                                   ( tnlParallelReductionDiffAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > >
-                                   ( tnlParallelReductionDiffAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > >
-                                   ( tnlParallelReductionDiffAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu
deleted file mode 100644
index c0f23b3102e45b51c754b771573129926efde8e8..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-l2-norm_impl.cu
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff L2 Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, int > >
-                                   ( tnlParallelReductionDiffL2Norm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, int > >
-                                   ( tnlParallelReductionDiffL2Norm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, int > >
-                                   ( tnlParallelReductionDiffL2Norm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< char, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< int, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< float, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< double, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffL2Norm< long double, long int > >
-                                   ( tnlParallelReductionDiffL2Norm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffL2Norm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffL2Norm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu
deleted file mode 100644
index a0d4a00262633dafc8b023e927647fd18fb760dd..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-lp-norm_impl.cu
+++ /dev/null
@@ -1,88 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > >
-                                   ( tnlParallelReductionDiffLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > >
-                                   ( tnlParallelReductionDiffLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > >
-                                   ( tnlParallelReductionDiffLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > >
-                                   ( tnlParallelReductionDiffLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu
deleted file mode 100644
index 3eaf7558b545ee30d4bbcf7de1394e2c7e357bb7..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > >
-                                   ( tnlParallelReductionDiffMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > >
-                                   ( tnlParallelReductionDiffMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > >
-                                   ( tnlParallelReductionDiffMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > >
-                                   ( tnlParallelReductionDiffMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > >
-                                   ( tnlParallelReductionDiffMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > >
-                                   ( tnlParallelReductionDiffMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > >
-                                   ( tnlParallelReductionDiffMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > >
-                                   ( tnlParallelReductionDiffMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > >
-                                   ( tnlParallelReductionDiffMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > >
-                                   ( tnlParallelReductionDiffMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu
deleted file mode 100644
index 9e0a1b447f1e54889f72aca3008c41a626b911ca..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-min_impl.cu
+++ /dev/null
@@ -1,104 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-
-/****
- * Diff min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > >
-                                   ( tnlParallelReductionDiffMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > >
-                                   ( tnlParallelReductionDiffMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > >
-                                   ( tnlParallelReductionDiffMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > >
-                                   ( tnlParallelReductionDiffMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > >
-                                   ( tnlParallelReductionDiffMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > >
-                                   ( tnlParallelReductionDiffMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > >
-                                   ( tnlParallelReductionDiffMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > >
-                                   ( tnlParallelReductionDiffMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > >
-                                   ( tnlParallelReductionDiffMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > >
-                                   ( tnlParallelReductionDiffMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu
deleted file mode 100644
index cbf0958556eb1d6c0b50654a81ecdc17f1c47650..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-diff-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-diff-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Diff sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > >
-                                   ( tnlParallelReductionDiffSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > >
-                                   ( tnlParallelReductionDiffSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > >
-                                   ( tnlParallelReductionDiffSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > >
-                                   ( tnlParallelReductionDiffSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > >
-                                   ( tnlParallelReductionDiffSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > >
-                                   ( tnlParallelReductionDiffSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > >
-                                   ( tnlParallelReductionDiffSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > >
-                                   ( tnlParallelReductionDiffSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > >
-                                   ( tnlParallelReductionDiffSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > >
-                                   ( tnlParallelReductionDiffSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu
deleted file mode 100644
index 7b7c322b7e51e54e9ae3c4826391dff661ec3456..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-equalities_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-equalities_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Equalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > >
-                                   ( tnlParallelReductionEqualities< char, int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > >
-                                   ( tnlParallelReductionEqualities< int, int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > >
-                                   ( tnlParallelReductionEqualities< float, int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > >
-                                   ( tnlParallelReductionEqualities< double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > >
-                                   ( tnlParallelReductionEqualities< long double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > >
-                                   ( tnlParallelReductionEqualities< char, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > >
-                                   ( tnlParallelReductionEqualities< int, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > >
-                                   ( tnlParallelReductionEqualities< float, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > >
-                                   ( tnlParallelReductionEqualities< double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > >
-                                   ( tnlParallelReductionEqualities< long double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu
deleted file mode 100644
index 08ca8d8bdc421c345d17671d2ea27829de080b82..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-inequalities_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-inequalities_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Inequalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > >
-                                   ( tnlParallelReductionInequalities< char, int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > >
-                                   ( tnlParallelReductionInequalities< int, int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > >
-                                   ( tnlParallelReductionInequalities< float, int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > >
-                                   ( tnlParallelReductionInequalities< double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > >
-                                   ( tnlParallelReductionInequalities< long double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > >
-                                   ( tnlParallelReductionInequalities< char, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > >
-                                   ( tnlParallelReductionInequalities< int, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > >
-                                   ( tnlParallelReductionInequalities< float, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > >
-                                   ( tnlParallelReductionInequalities< double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > >
-                                   ( tnlParallelReductionInequalities< long double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu
deleted file mode 100644
index 5169e1a2adc8eed4422221887f663dc15f5d612d..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-l2-norm_impl.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-l2-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * L2 Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, int > >
-                                   ( tnlParallelReductionL2Norm< float, int >& operation,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, int > >
-                                   ( tnlParallelReductionL2Norm< double, int>& operation,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, int > >
-                                   ( tnlParallelReductionL2Norm< long double, int>& operation,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< int, long int > >
-                                   ( tnlParallelReductionL2Norm< int, long int >& operation,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< int, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< float, long int > >
-                                   ( tnlParallelReductionL2Norm< float, long int >& operation,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< double, long int > >
-                                   ( tnlParallelReductionL2Norm< double, long int>& operation,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionL2Norm< long double, long int > >
-                                   ( tnlParallelReductionL2Norm< long double, long int>& operation,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionL2Norm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionL2Norm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu
deleted file mode 100644
index 3d5366013a114a5e5dfe2ec317bfc0015485019c..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-lp-norm_impl.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-lp-norm_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > >
-                                   ( tnlParallelReductionLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > >
-                                   ( tnlParallelReductionLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > >
-                                   ( tnlParallelReductionLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > >
-                                   ( tnlParallelReductionLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< int, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > >
-                                   ( tnlParallelReductionLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > >
-                                   ( tnlParallelReductionLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > >
-                                   ( tnlParallelReductionLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu
deleted file mode 100644
index a2965136d9816f4ad4ba3b5eaf1d29a9c49b7d82..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-max_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-max_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > >
-                                   ( tnlParallelReductionMax< char, int >& operation,
-                                     const typename tnlParallelReductionMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > >
-                                   ( tnlParallelReductionMax< int, int >& operation,
-                                     const typename tnlParallelReductionMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > >
-                                   ( tnlParallelReductionMax< float, int >& operation,
-                                     const typename tnlParallelReductionMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > >
-                                   ( tnlParallelReductionMax< double, int>& operation,
-                                     const typename tnlParallelReductionMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > >
-                                   ( tnlParallelReductionMax< long double, int>& operation,
-                                     const typename tnlParallelReductionMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > >
-                                   ( tnlParallelReductionMax< char, long int >& operation,
-                                     const typename tnlParallelReductionMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > >
-                                   ( tnlParallelReductionMax< int, long int >& operation,
-                                     const typename tnlParallelReductionMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > >
-                                   ( tnlParallelReductionMax< float, long int >& operation,
-                                     const typename tnlParallelReductionMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > >
-                                   ( tnlParallelReductionMax< double, long int>& operation,
-                                     const typename tnlParallelReductionMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > >
-                                   ( tnlParallelReductionMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu
deleted file mode 100644
index 2434189c4374574eff2a0a51ad5d8f50da2b833b..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-min_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-min_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > >
-                                   ( tnlParallelReductionMin< char, int >& operation,
-                                     const typename tnlParallelReductionMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > >
-                                   ( tnlParallelReductionMin< int, int >& operation,
-                                     const typename tnlParallelReductionMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > >
-                                   ( tnlParallelReductionMin< float, int >& operation,
-                                     const typename tnlParallelReductionMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > >
-                                   ( tnlParallelReductionMin< double, int>& operation,
-                                     const typename tnlParallelReductionMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > >
-                                   ( tnlParallelReductionMin< long double, int>& operation,
-                                     const typename tnlParallelReductionMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > >
-                                   ( tnlParallelReductionMin< char, long int >& operation,
-                                     const typename tnlParallelReductionMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > >
-                                   ( tnlParallelReductionMin< int, long int >& operation,
-                                     const typename tnlParallelReductionMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > >
-                                   ( tnlParallelReductionMin< float, long int >& operation,
-                                     const typename tnlParallelReductionMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > >
-                                   ( tnlParallelReductionMin< double, long int>& operation,
-                                     const typename tnlParallelReductionMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > >
-                                   ( tnlParallelReductionMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu
deleted file mode 100644
index 6e2c9849ec896138b4c8cf106a8922cd2437c8a9..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-or_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-or_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Logical OR
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > >
-                                   ( tnlParallelReductionLogicalOr< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > >
-                                   ( tnlParallelReductionLogicalOr< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > >
-                                   ( tnlParallelReductionLogicalOr< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > >
-                                   ( tnlParallelReductionLogicalOr< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > >
-                                   ( tnlParallelReductionLogicalOr< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > >
-                                   ( tnlParallelReductionLogicalOr< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > >
-                                   ( tnlParallelReductionLogicalOr< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > >
-                                   ( tnlParallelReductionLogicalOr< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > >
-                                   ( tnlParallelReductionLogicalOr< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > >
-                                   ( tnlParallelReductionLogicalOr< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
\ No newline at end of file
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu
deleted file mode 100644
index eabb3aff6c912faa88b45727c0082dac0538afea..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-scalar-product_impl.cu
+++ /dev/null
@@ -1,102 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-scalar-product_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * ScalarProduct
- */
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > >
-                                   ( tnlParallelReductionScalarProduct< char, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > >
-                                   ( tnlParallelReductionScalarProduct< int, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > >
-                                   ( tnlParallelReductionScalarProduct< float, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > >
-                                   ( tnlParallelReductionScalarProduct< double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > >
-                                   ( tnlParallelReductionScalarProduct< long double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > >
-                                   ( tnlParallelReductionScalarProduct< char, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > >
-                                   ( tnlParallelReductionScalarProduct< int, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > >
-                                   ( tnlParallelReductionScalarProduct< float, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > >
-                                   ( tnlParallelReductionScalarProduct< double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > >
-                                   ( tnlParallelReductionScalarProduct< long double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu
deleted file mode 100644
index 79d9263ab4922d91caf08493653a8b1effdc1cf6..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction-sum_impl.cu
+++ /dev/null
@@ -1,103 +0,0 @@
-/***************************************************************************
-                          cuda-reduction-sum_impl.cu  -  description
-                             -------------------
-    begin                : Jan 19, 2014
-    copyright            : (C) 2014 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
- 
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
- 
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > >
-                                   ( tnlParallelReductionSum< char, int >& operation,
-                                     const typename tnlParallelReductionSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > >
-                                   ( tnlParallelReductionSum< int, int >& operation,
-                                     const typename tnlParallelReductionSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > >
-                                   ( tnlParallelReductionSum< float, int >& operation,
-                                     const typename tnlParallelReductionSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > >
-                                   ( tnlParallelReductionSum< double, int>& operation,
-                                     const typename tnlParallelReductionSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > >
-                                   ( tnlParallelReductionSum< long double, int>& operation,
-                                     const typename tnlParallelReductionSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > >
-                                   ( tnlParallelReductionSum< char, long int >& operation,
-                                     const typename tnlParallelReductionSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > >
-                                   ( tnlParallelReductionSum< int, long int >& operation,
-                                     const typename tnlParallelReductionSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > >
-                                   ( tnlParallelReductionSum< float, long int >& operation,
-                                     const typename tnlParallelReductionSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > >
-                                   ( tnlParallelReductionSum< double, long int>& operation,
-                                     const typename tnlParallelReductionSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > >
-                                   ( tnlParallelReductionSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
diff --git a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp b/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp
deleted file mode 100644
index ce76fd397eea1d50f6fb873038f0a1223efd72e4..0000000000000000000000000000000000000000
--- a/src/TNL/Containers/Algorithms/TemplateExplicitInstantiation/cuda-reduction_impl.cpp
+++ /dev/null
@@ -1,1505 +0,0 @@
-/***************************************************************************
-                          cuda-reduction_impl.cpp  -  description
-                             -------------------
-    begin                : Mar 24, 2013
-    copyright            : (C) 2013 by Tomas Oberhuber
-    email                : tomas.oberhuber@fjfi.cvut.cz
- ***************************************************************************/
-
-/* See Copyright Notice in tnl/Copyright */
-
-#include <TNL/Containers/Algorithms/reduction-operations.h>
-#include <TNL/Containers/Algorithms/Reduction.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {   
-
-#ifdef UNDEF //TEMPLATE_EXPLICIT_INSTANTIATION
-
-/****
- * Sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > >
-                                   ( const tnlParallelReductionSum< char, int >& operation,
-                                     const typename tnlParallelReductionSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > >
-                                   ( const tnlParallelReductionSum< int, int >& operation,
-                                     const typename tnlParallelReductionSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > >
-                                   ( const tnlParallelReductionSum< float, int >& operation,
-                                     const typename tnlParallelReductionSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > >
-                                   ( const tnlParallelReductionSum< double, int>& operation,
-                                     const typename tnlParallelReductionSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > >
-                                   ( const tnlParallelReductionSum< long double, int>& operation,
-                                     const typename tnlParallelReductionSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > >
-                                   ( const tnlParallelReductionSum< char, long int >& operation,
-                                     const typename tnlParallelReductionSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > >
-                                   ( const tnlParallelReductionSum< int, long int >& operation,
-                                     const typename tnlParallelReductionSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > >
-                                   ( const tnlParallelReductionSum< float, long int >& operation,
-                                     const typename tnlParallelReductionSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > >
-                                   ( const tnlParallelReductionSum< double, long int>& operation,
-                                     const typename tnlParallelReductionSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > >
-                                   ( const tnlParallelReductionSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > >
-                                   ( const tnlParallelReductionMin< char, int >& operation,
-                                     const typename tnlParallelReductionMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > >
-                                   ( const tnlParallelReductionMin< int, int >& operation,
-                                     const typename tnlParallelReductionMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > >
-                                   ( const tnlParallelReductionMin< float, int >& operation,
-                                     const typename tnlParallelReductionMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > >
-                                   ( const tnlParallelReductionMin< double, int>& operation,
-                                     const typename tnlParallelReductionMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > >
-                                   ( const tnlParallelReductionMin< long double, int>& operation,
-                                     const typename tnlParallelReductionMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > >
-                                   ( const tnlParallelReductionMin< char, long int >& operation,
-                                     const typename tnlParallelReductionMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > >
-                                   ( const tnlParallelReductionMin< int, long int >& operation,
-                                     const typename tnlParallelReductionMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > >
-                                   ( const tnlParallelReductionMin< float, long int >& operation,
-                                     const typename tnlParallelReductionMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > >
-                                   ( const tnlParallelReductionMin< double, long int>& operation,
-                                     const typename tnlParallelReductionMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > >
-                                   ( const tnlParallelReductionMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Max
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > >
-                                   ( const tnlParallelReductionMax< char, int >& operation,
-                                     const typename tnlParallelReductionMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > >
-                                   ( const tnlParallelReductionMax< int, int >& operation,
-                                     const typename tnlParallelReductionMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > >
-                                   ( const tnlParallelReductionMax< float, int >& operation,
-                                     const typename tnlParallelReductionMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > >
-                                   ( const tnlParallelReductionMax< double, int>& operation,
-                                     const typename tnlParallelReductionMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > >
-                                   ( const tnlParallelReductionMax< long double, int>& operation,
-                                     const typename tnlParallelReductionMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > >
-                                   ( const tnlParallelReductionMax< char, long int >& operation,
-                                     const typename tnlParallelReductionMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > >
-                                   ( const tnlParallelReductionMax< int, long int >& operation,
-                                     const typename tnlParallelReductionMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > >
-                                   ( const tnlParallelReductionMax< float, long int >& operation,
-                                     const typename tnlParallelReductionMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > >
-                                   ( const tnlParallelReductionMax< double, long int>& operation,
-                                     const typename tnlParallelReductionMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > >
-                                   ( const tnlParallelReductionMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Abs sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > >
-                                   ( const tnlParallelReductionAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > >
-                                   ( const tnlParallelReductionAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > >
-                                   ( const tnlParallelReductionAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > >
-                                   ( const tnlParallelReductionAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > >
-                                   ( const tnlParallelReductionAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > >
-                                   ( const tnlParallelReductionAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > >
-                                   ( const tnlParallelReductionAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > >
-                                   ( const tnlParallelReductionAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > >
-                                   ( const tnlParallelReductionAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > >
-                                   ( const tnlParallelReductionAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Abs min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > >
-                                   ( const tnlParallelReductionAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > >
-                                   ( const tnlParallelReductionAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > >
-                                   ( const tnlParallelReductionAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > >
-                                   ( const tnlParallelReductionAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > >
-                                   ( const tnlParallelReductionAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > >
-                                   ( const tnlParallelReductionAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > >
-                                   ( const tnlParallelReductionAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > >
-                                   ( const tnlParallelReductionAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > >
-                                   ( const tnlParallelReductionAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > >
-                                   ( const tnlParallelReductionAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-/****
- * Abs max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > >
-                                   ( const tnlParallelReductionAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > >
-                                   ( const tnlParallelReductionAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > >
-                                   ( const tnlParallelReductionAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > >
-                                   ( const tnlParallelReductionAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > >
-                                   ( const tnlParallelReductionAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
-                                   ( const tnlParallelReductionAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > >
-                                   ( const tnlParallelReductionAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > >
-                                   ( const tnlParallelReductionAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > >
-                                   ( const tnlParallelReductionAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > >
-                                   ( const tnlParallelReductionAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Logical AND
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > >
-                                   ( const tnlParallelReductionLogicalAnd< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > >
-                                   ( const tnlParallelReductionLogicalAnd< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > >
-                                   ( const tnlParallelReductionLogicalAnd< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > >
-                                   ( const tnlParallelReductionLogicalAnd< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > >
-                                   ( const tnlParallelReductionLogicalAnd< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > >
-                                   ( const tnlParallelReductionLogicalAnd< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalAnd< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalAnd< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Logical OR
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > >
-                                   ( const tnlParallelReductionLogicalOr< char, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > >
-                                   ( const tnlParallelReductionLogicalOr< int, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > >
-                                   ( const tnlParallelReductionLogicalOr< float, int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > >
-                                   ( const tnlParallelReductionLogicalOr< double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > >
-                                   ( const tnlParallelReductionLogicalOr< long double, int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > >
-                                   ( const tnlParallelReductionLogicalOr< char, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > >
-                                   ( const tnlParallelReductionLogicalOr< int, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > >
-                                   ( const tnlParallelReductionLogicalOr< float, long int >& operation,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > >
-                                   ( const tnlParallelReductionLogicalOr< double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > >
-                                   ( const tnlParallelReductionLogicalOr< long double, long int>& operation,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLogicalOr< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLogicalOr< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > >
-                                   ( const tnlParallelReductionLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > >
-                                   ( const tnlParallelReductionLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > >
-                                   ( const tnlParallelReductionLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< char, long int > >
-                                   ( const tnlParallelReductionLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > >
-                                   ( const tnlParallelReductionLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > >
-                                   ( const tnlParallelReductionLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > >
-                                   ( const tnlParallelReductionLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > >
-                                   ( const tnlParallelReductionLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Equalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > >
-                                   ( const tnlParallelReductionEqualities< char, int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > >
-                                   ( const tnlParallelReductionEqualities< int, int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > >
-                                   ( const tnlParallelReductionEqualities< float, int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > >
-                                   ( const tnlParallelReductionEqualities< double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > >
-                                   ( const tnlParallelReductionEqualities< long double, int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > >
-                                   ( const tnlParallelReductionEqualities< char, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > >
-                                   ( const tnlParallelReductionEqualities< int, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > >
-                                   ( const tnlParallelReductionEqualities< float, long int >& operation,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > >
-                                   ( const tnlParallelReductionEqualities< double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > >
-                                   ( const tnlParallelReductionEqualities< long double, long int>& operation,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionEqualities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionEqualities< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Inequalities
- */
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > >
-                                   ( const tnlParallelReductionInequalities< char, int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > >
-                                   ( const tnlParallelReductionInequalities< int, int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > >
-                                   ( const tnlParallelReductionInequalities< float, int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > >
-                                   ( const tnlParallelReductionInequalities< double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > >
-                                   ( const tnlParallelReductionInequalities< long double, int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > >
-                                   ( const tnlParallelReductionInequalities< char, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > >
-                                   ( const tnlParallelReductionInequalities< int, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > >
-                                   ( const tnlParallelReductionInequalities< float, long int >& operation,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > >
-                                   ( const tnlParallelReductionInequalities< double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > >
-                                   ( const tnlParallelReductionInequalities< long double, long int>& operation,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionInequalities< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionInequalities< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * ScalarProduct
- */
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > >
-                                   ( const tnlParallelReductionScalarProduct< char, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > >
-                                   ( const tnlParallelReductionScalarProduct< int, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > >
-                                   ( const tnlParallelReductionScalarProduct< float, int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > >
-                                   ( const tnlParallelReductionScalarProduct< double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > >
-                                   ( const tnlParallelReductionScalarProduct< long double, int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > >
-                                   ( const tnlParallelReductionScalarProduct< char, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > >
-                                   ( const tnlParallelReductionScalarProduct< int, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > >
-                                   ( const tnlParallelReductionScalarProduct< float, long int >& operation,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > >
-                                   ( const tnlParallelReductionScalarProduct< double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > >
-                                   ( const tnlParallelReductionScalarProduct< long double, long int>& operation,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionScalarProduct< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionScalarProduct< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff sum
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > >
-                                   ( const tnlParallelReductionDiffSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > >
-                                   ( const tnlParallelReductionDiffSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > >
-                                   ( const tnlParallelReductionDiffSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > >
-                                   ( const tnlParallelReductionDiffSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > >
-                                   ( const tnlParallelReductionDiffSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > >
-                                   ( const tnlParallelReductionDiffSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > >
-                                   ( const tnlParallelReductionDiffSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > >
-                                   ( const tnlParallelReductionDiffSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > >
-                                   ( const tnlParallelReductionDiffSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > >
-                                   ( const tnlParallelReductionDiffSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff min
- */
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > >
-                                   ( const tnlParallelReductionDiffMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > >
-                                   ( const tnlParallelReductionDiffMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > >
-                                   ( const tnlParallelReductionDiffMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > >
-                                   ( const tnlParallelReductionDiffMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > >
-                                   ( const tnlParallelReductionDiffMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > >
-                                   ( const tnlParallelReductionDiffMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > >
-                                   ( const tnlParallelReductionDiffMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > >
-                                   ( const tnlParallelReductionDiffMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > >
-                                   ( const tnlParallelReductionDiffMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > >
-                                   ( const tnlParallelReductionDiffMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > >
-                                   ( const tnlParallelReductionDiffMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > >
-                                   ( const tnlParallelReductionDiffMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > >
-                                   ( const tnlParallelReductionDiffMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > >
-                                   ( const tnlParallelReductionDiffMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > >
-                                   ( const tnlParallelReductionDiffMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > >
-                                   ( const tnlParallelReductionDiffMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > >
-                                   ( const tnlParallelReductionDiffMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > >
-                                   ( const tnlParallelReductionDiffMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > >
-                                   ( const tnlParallelReductionDiffMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > >
-                                   ( const tnlParallelReductionDiffMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs sum
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsSum< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsSum< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsSum< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsSum< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs min
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsMin< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMin< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMin< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMin< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff abs max
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< char, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< int, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< float, int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > >
-                                   ( const tnlParallelReductionDiffAbsMax< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > >
-                                   ( const tnlParallelReductionDiffAbsMax< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffAbsMax< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffAbsMax< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-/****
- * Diff Lp Norm
- */
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< float, int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > >
-                                   ( const tnlParallelReductionDiffLpNorm< long double, int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, int> :: ResultType& result );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< char, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< char, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< int, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< int, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< float, long int >& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< float, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result );
-
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< double, long int> :: ResultType& result );
-
-#ifdef INSTANTIATE_LONG_DOUBLE
-template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > >
-                                   ( const tnlParallelReductionDiffLpNorm< long double, long int>& operation,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput1,
-                                     const typename tnlParallelReductionDiffLpNorm< long double, long int > :: RealType* deviceInput2,
-                                     typename tnlParallelReductionDiffLpNorm< long double, long int> :: ResultType& result );
-#endif
-#endif
-
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
index b33f425789dbb848e3cfa3f304c5bfe3898167e6..b1bc4dec94817845791641d9a59b9d5bb43ded35 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h
@@ -602,267 +602,3 @@ computeExclusivePrefixSum( Vector& v,
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Containers {
-namespace Algorithms {
-
-/****
- * Max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorMax( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorMin( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMax( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorAbsMin( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< int, Devices::Cuda, long int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long int, Devices::Cuda, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< float, Devices::Cuda, long int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< double, Devices::Cuda, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorLpNorm( const Vector< long double, Devices::Cuda, long int >& v, const long double& p );
-#endif
-#endif
-
-/****
- * Sum
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorSum( const Vector< int, Devices::Cuda, long int >& v );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long int, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorSum( const Vector< float, Devices::Cuda, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorSum( const Vector< double, Devices::Cuda, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorSum( const Vector< long double, Devices::Cuda, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, int >& v1, const Vector< int, Devices::Cuda, int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, int >& v1, const Vector< long int, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, int >& v1,  const Vector< float, Devices::Cuda, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, int >& v1, const Vector< double, Devices::Cuda, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, int >& v1, const Vector< long double, Devices::Cuda, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< int, Devices::Cuda, long int >& v1, const Vector< int, Devices::Cuda, long int >& v2 );
-extern template long int    VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Cuda, long int >& v1, const Vector< long int, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< float, Devices::Cuda, long int >& v1, const Vector< float, Devices::Cuda, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< double, Devices::Cuda, long int >& v1, const Vector< double, Devices::Cuda, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Cuda >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Cuda, long int >& v1, const Vector< long double, Devices::Cuda, long int >& v2 );
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-#endif
diff --git a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
index 8adf05337100039f42aec8049d939851fd6a23d3..ef938886ed93821d1cd071402c2ac85b66e22c42 100644
--- a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
+++ b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h
@@ -642,268 +642,3 @@ computeExclusivePrefixSum( Vector& v,
 } // namespace Algorithms
 } // namespace Containers
 } // namespace TNL
-
-#ifdef TEMPLATE_EXPLICIT_INSTANTIATION
-
-#include <TNL/Containers/Vector.h>
-
-namespace TNL {
-namespace Containers {   
-namespace Algorithms {
-
-/****
- * Max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorMax( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorMin( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMax( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Abs min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorAbsMin( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Lp norm
- */
-extern template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, int >& v, const long double& p );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< int, Devices::Host, long int >& v, const int& p );
-extern template long int    VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long int, Devices::Host, long int >& v, const long int& p );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< float, Devices::Host, long int >& v, const float& p );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< double, Devices::Host, long int >& v, const double& p );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorLpNorm( const Vector< long double, Devices::Host, long int >& v, const long double& p );
-#endif
-#endif
-
-/****
- * Sum
- */
-extern template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, int >& v );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorSum( const Vector< int, Devices::Host, long int >& v );
-extern template long int    VectorOperations< Devices::Host >::getVectorSum( const Vector< long int, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorSum( const Vector< float, Devices::Host, long int >& v );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorSum( const Vector< double, Devices::Host, long int >& v );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorSum( const Vector< long double, Devices::Host, long int >& v );
-#endif
-#endif
-
-/****
- * Difference max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs max
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMax( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-/****
- * Difference abs min
- */
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, int >& v1, const Vector< int, Devices::Host, int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, int >& v1, const Vector< long int, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, int >& v1,  const Vector< float, Devices::Host, int >& v2);
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, int >& v1, const Vector< double, Devices::Host, int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, int >& v1, const Vector< long double, Devices::Host, int >& v2 );
-#endif
-
-
-#ifdef INSTANTIATE_LONG_INT
-extern template int         VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< int, Devices::Host, long int >& v1, const Vector< int, Devices::Host, long int >& v2 );
-extern template long int    VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long int, Devices::Host, long int >& v1, const Vector< long int, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_FLOAT
-extern template float       VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< float, Devices::Host, long int >& v1, const Vector< float, Devices::Host, long int >& v2 );
-#endif
-extern template double      VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< double, Devices::Host, long int >& v1, const Vector< double, Devices::Host, long int >& v2 );
-#ifdef INSTANTIATE_LONG_DOUBLE
-extern template long double VectorOperations< Devices::Host >::getVectorDifferenceAbsMin( const Vector< long double, Devices::Host, long int >& v1, const Vector< long double, Devices::Host, long int >& v2 );
-#endif
-#endif
-
-} // namespace Algorithms
-} // namespace Containers
-} // namespace TNL
-
-#endif