diff --git a/src/TNL/File.h b/src/TNL/File.h index 4a8863b9b64b08c130b30fc18471dfab229f30e3..9295c2368108603e5b4b1bc5617ad6a8937ae0a6 100644 --- a/src/TNL/File.h +++ b/src/TNL/File.h @@ -20,13 +20,6 @@ namespace TNL { -/** - * When we transfer data between the GPU and the CPU we use 5 MB buffer. This - * size should ensure good performance -- see. - * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer - */ -static constexpr std::streamsize FileGPUvsCPUTransferBufferSize = 5 * 2<<20; - /** * \brief This class serves for binary IO. It allows to do IO even for data allocated on GPU * @@ -57,24 +50,27 @@ class File File() = default; /** - * \brief Open given file and returns \e true after the file is - * successfully opened. Otherwise returns \e false. + * \brief Open given file. * - * Opens file with given \e fileName and returns true/false based on the success in opening the file. - * \param fileName String which indicates name of the file user wants to open. - * \param mode Indicates what user needs to do with opened file. + * Opens file with given \e fileName in some \e mode from \ref File::Mode. + * + * Throws \ref std::ios_base::failure on failure. + * + * \param fileName String which indicates file name. + * \param mode Indicates in what mode the will be opened - see. \ref File::Mode. */ void open( const String& fileName, Mode mode = static_cast< Mode >( static_cast< int >( Mode::In ) | static_cast< int >( Mode::Out ) ) ); /** - * \brief Attempts to close given file and returns \e true when the file is - * successfully closed. Otherwise returns \e false. + * \brief Closes the file. + * + * Throws \ref std::ios_base::failure on failure. */ void close(); /** - * \brief Returns name of given file. + * \brief Returns name of the file. */ const String& getFileName() const { @@ -82,19 +78,20 @@ class File } /** - * \brief Method that can write particular data type from given file into GPU. (Function that gets particular elements from given file.) + * \brief Method for reading data with given \e Type from the file. * - * Returns \e true when the elements are successfully read from given file. Otherwise returns \e false. + * The data will be stored in \e buffer allocated on device given by the + * \e Device parameter. * * Throws \ref std::ios_base::failure on failure. * * \tparam Type Type of data. - * \tparam Device Place where data are stored after reading from file. For example \ref Devices::Host or \ref Devices::Cuda. - * \tparam Index Type of index by which the elements are indexed. + * \tparam Device Device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda. + * \tparam SourceType Type of index by which the elements are indexed. * \param buffer Pointer in memory where the elements are loaded and stored after reading. * \param elements Number of elements the user wants to get (read) from given file. */ - template< typename Type, typename Device = Devices::Host > + template< typename Type, typename Device = Devices::Host, typename SourceType = Type > bool read( Type* buffer, std::streamsize elements = 1 ); /** @@ -110,23 +107,26 @@ class File * \param buffer Pointer in memory where the elements are loaded from before writing into file. * \param elements Number of elements the user wants to write into the given file. */ - template< typename Type, typename Device = Devices::Host > + template< typename Type, typename Device = Devices::Host, typename TargetType = Type > bool write( const Type* buffer, std::streamsize elements = 1 ); protected: template< typename Type, typename Device, + typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, + typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, + typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > @@ -134,17 +134,20 @@ class File template< typename Type, typename Device, + typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, + typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, + typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > @@ -152,6 +155,14 @@ class File std::fstream file; String fileName; + + /** + * When we transfer data between the GPU and the CPU we use 5 MB buffer. This + * size should ensure good performance -- see. + * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer . + * We use the same buffer size even for retyping data during IO operations. + */ + static constexpr std::streamsize TransferBufferSize = 5 * 2<<20; }; /** diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp index 1db792bd5c0934eefc8ef26cdbd64d9e97a29c87..bbff0ab96010144fc253e16a157946438fac09ca 100644 --- a/src/TNL/File.hpp +++ b/src/TNL/File.hpp @@ -80,7 +80,7 @@ inline void File::close() fileName = ""; } -template< typename Type, typename Device > +template< typename Type, typename Device, typename SourceType > bool File::read( Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to read must be non-negative." ); @@ -94,35 +94,78 @@ bool File::read( Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, + typename SourceType, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { - file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements ); - return true; + if( std::is_same< Type, SourceType >::value ) + { + file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements ); + return true; + } + else + { + const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); + using BaseType = typename std::remove_cv< SourceType >::type; + std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; + std::streamsize readElements = 0; + while( readElements < elements ) + { + const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); + file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); + for( std::streamsize i = 0; i < transfer; i++ ) + buffer[ readElements ++ ] = static_cast< Type >( cast_buffer[ i ] ); + readElements += transfer; + } + } } // Cuda template< typename Type, typename Device, + typename SourceType, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA - const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; std::streamsize readElements = 0; - while( readElements < elements ) + if( std::is_same< Type, SourceType >::value ) { - const std::streamsize transfer = std::min( elements - readElements, host_buffer_size ); - file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer ); - cudaMemcpy( (void*) &buffer[ readElements ], - (void*) host_buffer.get(), - transfer * sizeof( Type ), - cudaMemcpyHostToDevice ); - TNL_CHECK_CUDA_DEVICE; - readElements += transfer; + while( readElements < elements ) + { + const std::streamsize transfer = std::min( elements - readElements, host_buffer_size ); + file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer ); + cudaMemcpy( (void*) &buffer[ readElements ], + (void*) host_buffer.get(), + transfer * sizeof( Type ), + cudaMemcpyHostToDevice ); + TNL_CHECK_CUDA_DEVICE; + readElements += transfer; + } + } + else + { + const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); + using BaseType = typename std::remove_cv< SorceType >::type; + std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; + + while( readElements < elements ) + { + const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); + file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); + for( std::streamsize i = 0; i < transfer; i++ ) + host_buffer[ i ] = static_cast< Type >( cast_buffer[ i ] ); + cudaMemcpy( (void*) &buffer[ readElements ], + (void*) host_buffer.get(), + transfer * sizeof( Type ), + cudaMemcpyHostToDevice ); + TNL_CHECK_CUDA_DEVICE; + readElements += transfer; + } } return true; #else @@ -133,11 +176,12 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, + typename SourceType, typename, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC - const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; @@ -167,7 +211,7 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) #endif } -template< class Type, typename Device > +template< class Type, typename Device, typename TargeType > bool File::write( const Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to write must be non-negative." ); @@ -181,6 +225,7 @@ bool File::write( const Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, + typename TargetType, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { @@ -191,11 +236,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // Cuda template< typename Type, typename Device, + typename TargetType, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA - const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; @@ -220,11 +266,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, + typename TargetType, typename, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC - const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };