diff --git a/src/TNL/File.h b/src/TNL/File.h
index 4a8863b9b64b08c130b30fc18471dfab229f30e3..9295c2368108603e5b4b1bc5617ad6a8937ae0a6 100644
--- a/src/TNL/File.h
+++ b/src/TNL/File.h
@@ -20,13 +20,6 @@
 
 namespace TNL {
 
-/**
- * When we transfer data between the GPU and the CPU we use 5 MB buffer. This
- * size should ensure good performance -- see.
- * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer
- */
-static constexpr std::streamsize FileGPUvsCPUTransferBufferSize = 5 * 2<<20;
-
 /**
  * \brief This class serves for binary IO. It allows to do IO even for data allocated on GPU
  *
@@ -57,24 +50,27 @@ class File
       File() = default;
 
       /**
-       * \brief Open given file and returns \e true after the file is
-       * successfully opened. Otherwise returns \e false.
+       * \brief Open given file.
        *
-       * Opens file with given \e fileName and returns true/false based on the success in opening the file.
-       * \param fileName String which indicates name of the file user wants to open.
-       * \param mode Indicates what user needs to do with opened file.
+       * Opens file with given \e fileName in some \e mode from \ref File::Mode.
+       * 
+       * Throws \ref std::ios_base::failure on failure.
+       * 
+       * \param fileName String which indicates file name.
+       * \param mode Indicates in what mode the will be opened - see. \ref File::Mode.
        */
       void open( const String& fileName,
                  Mode mode = static_cast< Mode >( static_cast< int >( Mode::In ) | static_cast< int >( Mode::Out ) ) );
 
       /**
-       * \brief Attempts to close given file and returns \e true when the file is
-       * successfully closed. Otherwise returns \e false.
+       * \brief Closes the file.
+       * 
+       * Throws \ref std::ios_base::failure on failure.
        */
       void close();
 
       /**
-       * \brief Returns name of given file.
+       * \brief Returns name of the file.
        */
       const String& getFileName() const
       {
@@ -82,19 +78,20 @@ class File
       }
 
       /**
-       * \brief Method that can write particular data type from given file into GPU. (Function that gets particular elements from given file.)
+       * \brief Method for reading data with given \e Type from the file.
        *
-       * Returns \e true when the elements are successfully read from given file. Otherwise returns \e false.
+       * The data will be stored in \e buffer allocated on device given by the
+       * \e Device parameter.
        *
        * Throws \ref std::ios_base::failure on failure.
        *
        * \tparam Type Type of data.
-       * \tparam Device Place where data are stored after reading from file. For example \ref Devices::Host or \ref Devices::Cuda.
-       * \tparam Index Type of index by which the elements are indexed.
+       * \tparam Device Device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda.
+       * \tparam SourceType Type of index by which the elements are indexed.
        * \param buffer Pointer in memory where the elements are loaded and stored after reading.
        * \param elements Number of elements the user wants to get (read) from given file.
        */
-      template< typename Type, typename Device = Devices::Host >
+      template< typename Type, typename Device = Devices::Host, typename SourceType = Type >
       bool read( Type* buffer, std::streamsize elements = 1 );
 
       /**
@@ -110,23 +107,26 @@ class File
        * \param buffer Pointer in memory where the elements are loaded from before writing into file.
        * \param elements Number of elements the user wants to write into the given file.
        */
-      template< typename Type, typename Device = Devices::Host >
+      template< typename Type, typename Device = Devices::Host, typename TargetType = Type >
       bool write( const Type* buffer, std::streamsize elements = 1 );
 
    protected:
       template< typename Type,
                 typename Device,
+                typename SourceType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
       bool read_impl( Type* buffer, std::streamsize elements );
 
       template< typename Type,
                 typename Device,
+                typename SourceType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
                 typename = void >
       bool read_impl( Type* buffer, std::streamsize elements );
 
       template< typename Type,
                 typename Device,
+                typename SourceType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
                 typename = void,
                 typename = void >
@@ -134,17 +134,20 @@ class File
 
       template< typename Type,
                 typename Device,
+                typename TargetType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type >
       bool write_impl( const Type* buffer, std::streamsize elements );
 
       template< typename Type,
                 typename Device,
+                typename TargetType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type,
                 typename = void >
       bool write_impl( const Type* buffer, std::streamsize elements );
 
       template< typename Type,
                 typename Device,
+                typename TargetType,
                 typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
                 typename = void,
                 typename = void >
@@ -152,6 +155,14 @@ class File
 
       std::fstream file;
       String fileName;
+      
+      /**
+       * When we transfer data between the GPU and the CPU we use 5 MB buffer. This
+       * size should ensure good performance -- see.
+       * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer .
+       * We use the same buffer size even for retyping data during IO operations.
+       */
+      static constexpr std::streamsize TransferBufferSize = 5 * 2<<20;
 };
 
 /**
diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp
index 1db792bd5c0934eefc8ef26cdbd64d9e97a29c87..bbff0ab96010144fc253e16a157946438fac09ca 100644
--- a/src/TNL/File.hpp
+++ b/src/TNL/File.hpp
@@ -80,7 +80,7 @@ inline void File::close()
    fileName = "";
 }
 
-template< typename Type, typename Device >
+template< typename Type, typename Device, typename SourceType >
 bool File::read( Type* buffer, std::streamsize elements )
 {
    TNL_ASSERT_GE( elements, 0, "Number of elements to read must be non-negative." );
@@ -94,35 +94,78 @@ bool File::read( Type* buffer, std::streamsize elements )
 // Host
 template< typename Type,
           typename Device,
+          typename SourceType,
           typename >
 bool File::read_impl( Type* buffer, std::streamsize elements )
 {
-   file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements );
-   return true;
+   if( std::is_same< Type, SourceType >::value )
+   {
+      file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements );
+      return true;
+   }
+   else
+   {
+      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      using BaseType = typename std::remove_cv< SourceType >::type;
+      std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
+      std::streamsize readElements = 0;
+      while( readElements < elements )
+      {
+         const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size );
+         file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer );
+         for( std::streamsize i = 0; i < transfer; i++ )
+            buffer[ readElements ++ ] = static_cast< Type >( cast_buffer[ i ] );
+         readElements += transfer;
+      }
+   }
 }
 
 // Cuda
 template< typename Type,
           typename Device,
+          typename SourceType,
           typename, typename >
 bool File::read_impl( Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
    std::streamsize readElements = 0;
-   while( readElements < elements )
+   if( std::is_same< Type, SourceType >::value )
    {
-      const std::streamsize transfer = std::min( elements - readElements, host_buffer_size );
-      file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer );
-      cudaMemcpy( (void*) &buffer[ readElements ],
-                  (void*) host_buffer.get(),
-                  transfer * sizeof( Type ),
-                  cudaMemcpyHostToDevice );
-      TNL_CHECK_CUDA_DEVICE;
-      readElements += transfer;
+      while( readElements < elements )
+      {
+         const std::streamsize transfer = std::min( elements - readElements, host_buffer_size );
+         file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer );
+         cudaMemcpy( (void*) &buffer[ readElements ],
+                     (void*) host_buffer.get(),
+                     transfer * sizeof( Type ),
+                     cudaMemcpyHostToDevice );
+         TNL_CHECK_CUDA_DEVICE;
+         readElements += transfer;
+      }
+   }
+   else
+   {
+      const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements );
+      using BaseType = typename std::remove_cv< SorceType >::type;
+      std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] };
+
+      while( readElements < elements )
+      {
+         const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size );
+         file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer );
+         for( std::streamsize i = 0; i < transfer; i++ )
+            host_buffer[ i ] = static_cast< Type >( cast_buffer[ i ] );
+         cudaMemcpy( (void*) &buffer[ readElements ],
+                     (void*) host_buffer.get(),
+                     transfer * sizeof( Type ),
+                     cudaMemcpyHostToDevice );
+         TNL_CHECK_CUDA_DEVICE;
+         readElements += transfer;
+      }
    }
    return true;
 #else
@@ -133,11 +176,12 @@ bool File::read_impl( Type* buffer, std::streamsize elements )
 // MIC
 template< typename Type,
           typename Device,
+          typename SourceType,
           typename, typename, typename >
 bool File::read_impl( Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -167,7 +211,7 @@ bool File::read_impl( Type* buffer, std::streamsize elements )
 #endif
 }
 
-template< class Type, typename Device >
+template< class Type, typename Device, typename TargeType >
 bool File::write( const Type* buffer, std::streamsize elements )
 {
    TNL_ASSERT_GE( elements, 0, "Number of elements to write must be non-negative." );
@@ -181,6 +225,7 @@ bool File::write( const Type* buffer, std::streamsize elements )
 // Host
 template< typename Type,
           typename Device,
+          typename TargetType,
           typename >
 bool File::write_impl( const Type* buffer, std::streamsize elements )
 {
@@ -191,11 +236,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements )
 // Cuda
 template< typename Type,
           typename Device,
+          typename TargetType,
           typename, typename >
 bool File::write_impl( const Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_CUDA
-   const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
 
@@ -220,11 +266,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements )
 // MIC
 template< typename Type,
           typename Device,
+          typename TargetType,
           typename, typename, typename >
 bool File::write_impl( const Type* buffer, std::streamsize elements )
 {
 #ifdef HAVE_MIC
-   const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements );
+   const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
    using BaseType = typename std::remove_cv< Type >::type;
    std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };