diff --git a/src/core/arrays/tnlSharedArray.h b/src/core/arrays/tnlSharedArray.h
index 0787b1c30594a5ebb4e711412a406b87d6fe685c..e62b40de771478c913fac0e7674da0c3a9055403 100644
--- a/src/core/arrays/tnlSharedArray.h
+++ b/src/core/arrays/tnlSharedArray.h
@@ -44,13 +44,17 @@ class tnlSharedArray : public tnlObject
    typedef tnlSharedArray< Element, tnlHost, Index > HostType;
    typedef tnlSharedArray< Element, tnlCuda, Index > CudaType;
 
+   __cuda_callable__
    tnlSharedArray();
 
+   __cuda_callable__
    tnlSharedArray( Element* _data,
                    const Index _size );
 
+   __cuda_callable__
    tnlSharedArray( tnlArray< Element, Device, Index >& array );
 
+   __cuda_callable__
    tnlSharedArray( tnlSharedArray< Element, Device, Index >& array );
 
    static tnlString getType();
@@ -61,17 +65,21 @@ class tnlSharedArray : public tnlObject
 
    virtual tnlString getSerializationTypeVirtual() const;
 
+   __cuda_callable__
    void bind( Element* _data,
               const Index _size );
 
    template< typename Array >
+   __cuda_callable__
    void bind( Array& array,
               IndexType index = 0,
               IndexType size = 0 );
 
    template< int Size >
+   __cuda_callable__
    void bind( tnlStaticArray< Size, Element >& array );
 
+   __cuda_callable__
    void bind( tnlSharedArray< Element, Device, Index >& array );
 
    void swap( tnlSharedArray< Element, Device, Index >& array );
diff --git a/src/core/arrays/tnlSharedArray_impl.h b/src/core/arrays/tnlSharedArray_impl.h
index 8ec3257f4963dabaa28e97e1e685e007b7e57f2b..9b7b2a5b0baa13c60c53f91e9113d42428076728 100644
--- a/src/core/arrays/tnlSharedArray_impl.h
+++ b/src/core/arrays/tnlSharedArray_impl.h
@@ -31,6 +31,7 @@ using namespace std;
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedArray< Element, Device, Index >::tnlSharedArray()
 : size( 0 ), data( 0 )
 {
@@ -39,6 +40,7 @@ tnlSharedArray< Element, Device, Index >::tnlSharedArray()
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedArray< Element, Device, Index >::tnlSharedArray( Element* _data,
                                                           const Index _size )
 {
@@ -48,6 +50,7 @@ tnlSharedArray< Element, Device, Index >::tnlSharedArray( Element* _data,
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedArray< Element, Device, Index >::tnlSharedArray( tnlArray< Element, Device, Index >& array )
 {
    this->bind( array );
@@ -56,6 +59,7 @@ tnlSharedArray< Element, Device, Index >::tnlSharedArray( tnlArray< Element, Dev
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedArray< Element, Device, Index >::tnlSharedArray( tnlSharedArray< Element, Device, Index >& array )
 {
    this->bind( array );
@@ -99,6 +103,7 @@ tnlString tnlSharedArray< Element, Device, Index > :: getSerializationTypeVirtua
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 void tnlSharedArray< Element, Device, Index > :: bind( Element* data,
                                                        const Index size )
 {
@@ -116,6 +121,7 @@ template< typename Element,
           typename Device,
           typename Index >
    template< typename Array >
+__cuda_callable__
 void tnlSharedArray< Element, Device, Index > :: bind( Array& array,
                                                        IndexType index,
                                                        IndexType size )
@@ -134,6 +140,7 @@ template< typename Element,
           typename Device,
           typename Index >
    template< int Size >
+__cuda_callable__
 void tnlSharedArray< Element, Device, Index >::bind( tnlStaticArray< Size, Element >& array )
 {
    this->size = Size;
@@ -143,6 +150,7 @@ void tnlSharedArray< Element, Device, Index >::bind( tnlStaticArray< Size, Eleme
 template< typename Element,
           typename Device,
           typename Index >
+__cuda_callable__
 void tnlSharedArray< Element, Device, Index > :: bind( tnlSharedArray< Element, Device, Index >& array )
 {
    this -> size = array. getSize();
diff --git a/src/core/tnlFile.h b/src/core/tnlFile.h
index 1cee3a8b4509a620cc47c18e3de941e7423303a7..0579ecc3e36346dfcf507b77e92122e2c8d48610 100644
--- a/src/core/tnlFile.h
+++ b/src/core/tnlFile.h
@@ -29,7 +29,6 @@
 #include <core/mfuncs.h>
 #include <core/tnlAssert.h>
 #include <core/tnlString.h>
-#include <core/tnlObject.h>
 #include <core/tnlHost.h>
 #include <core/tnlCuda.h>
 
diff --git a/src/core/tnlObject.h b/src/core/tnlObject.h
index be9f0f26d7cc4b7969978aab0d407dd8fda5db72..e7ca120a004b21ef442c0ca4fdf9eb3fe31ba3b6 100644
--- a/src/core/tnlObject.h
+++ b/src/core/tnlObject.h
@@ -18,6 +18,7 @@
 #ifndef tnlObjectH
 #define tnlObjectH
 
+#include <core/tnlCuda.h>
 #include <core/tnlString.h>
 
 class tnlFile;
@@ -39,11 +40,9 @@ class tnlObject
    public:
 
    //! Basic constructor
+   __cuda_callable__
    tnlObject();
 
-   //! Constructor with name
-   tnlObject( const tnlString& name );
-
    /****
     * Type getter. This returns the type in C++ style - for example the returned value
     * may look as follows: "tnlVector< double, tnlCuda >".
@@ -72,6 +71,9 @@ class tnlObject
    bool load( const tnlString& fileName );
 
    //! Destructor
+   // FIXME: __cuda_callable__ would have to be added to every overriding destructor,
+   // even if the object's constructor is not __cuda_callable__
+//   __cuda_callable__
    virtual ~tnlObject(){};
 
 };
@@ -83,4 +85,6 @@ bool getObjectType( const tnlString& file_name, tnlString& type );
 bool parseObjectType( const tnlString& objectType,
                       tnlList< tnlString >& parsedObjectType );
 
+#include <core/tnlObject_impl.h>
+
 #endif
diff --git a/src/core/tnlObject.cpp b/src/core/tnlObject_impl.h
similarity index 97%
rename from src/core/tnlObject.cpp
rename to src/core/tnlObject_impl.h
index f4b2ed0f13184a20c3fb421b90fb04876f088a18..c5d19d10dc5ed47b5ad1fb45f6e0eb9322f2d5a1 100644
--- a/src/core/tnlObject.cpp
+++ b/src/core/tnlObject_impl.h
@@ -15,6 +15,9 @@
  *                                                                         *
  ***************************************************************************/
 
+#ifndef tnlObject_H_IMPLEMENTATION
+#define tnlObject_H_IMPLEMENTATION
+
 #include <core/tnlObject.h>
 #include <debug/tnlDebug.h>
 #include <core/tnlAssert.h>
@@ -27,15 +30,11 @@
 
 const char magic_number[] = "TNLMN";
 
+__cuda_callable__
 tnlObject :: tnlObject()
 {
 }
 
-
-tnlObject :: tnlObject( const tnlString& _name )
-{
-}
-
 tnlString tnlObject :: getType()
 {
    return tnlString( "tnlObject" );
@@ -207,4 +206,4 @@ bool parseObjectType( const tnlString& objectType,
    return true;
 }
 
-
+#endif /* tnlObject_H_IMPLEMENTATION */
diff --git a/src/core/vectors/tnlSharedVector.h b/src/core/vectors/tnlSharedVector.h
index d3e19391f1008a8219572c1c521e91e543a85bf3..ed22e01bec0ee7fcc01da6f4505b5250d162e57e 100644
--- a/src/core/vectors/tnlSharedVector.h
+++ b/src/core/vectors/tnlSharedVector.h
@@ -38,13 +38,17 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index >
    typedef tnlSharedVector< Real, tnlCuda, Index > CudaType;
 
 
+   __cuda_callable__
    tnlSharedVector();
 
+   __cuda_callable__
    tnlSharedVector( Real* data,
                     const Index size );
 
+   __cuda_callable__
    tnlSharedVector( tnlVector< Real, Device, Index >& vector );
 
+   __cuda_callable__
    tnlSharedVector( tnlSharedVector< Real, Device, Index >& vector );
 
    static tnlString getType();
diff --git a/src/core/vectors/tnlSharedVector_impl.h b/src/core/vectors/tnlSharedVector_impl.h
index 83a5c92e396a3a27de2075ae1e96db929f3d42de..f3c9bdd99490840c612b59fff332b31dfd192b41 100644
--- a/src/core/vectors/tnlSharedVector_impl.h
+++ b/src/core/vectors/tnlSharedVector_impl.h
@@ -23,6 +23,7 @@
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedVector< Real, Device, Index >::tnlSharedVector()
 {
 }
@@ -30,6 +31,7 @@ tnlSharedVector< Real, Device, Index >::tnlSharedVector()
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedVector< Real, Device, Index >::tnlSharedVector( Real* data,
                                                          const Index size )
 : tnlSharedArray< Real, Device, Index >( data, size )
@@ -39,6 +41,7 @@ tnlSharedVector< Real, Device, Index >::tnlSharedVector( Real* data,
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedVector< Real, Device, Index >::tnlSharedVector( tnlVector< Real, Device, Index >& vector )
 : tnlSharedArray< Real, Device, Index >( vector )
 {
@@ -47,6 +50,7 @@ tnlSharedVector< Real, Device, Index >::tnlSharedVector( tnlVector< Real, Device
 template< typename Real,
           typename Device,
           typename Index >
+__cuda_callable__
 tnlSharedVector< Real, Device, Index >::tnlSharedVector( tnlSharedVector< Real, Device, Index >& vector )
 : tnlSharedArray< Real, Device, Index >( vector )
 {