Loading src/TNL/File.h +32 −21 Original line number Diff line number Diff line Loading @@ -20,13 +20,6 @@ namespace TNL { /** * When we transfer data between the GPU and the CPU we use 5 MB buffer. This * size should ensure good performance -- see. * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer */ static constexpr std::streamsize FileGPUvsCPUTransferBufferSize = 5 * 2<<20; /** * \brief This class serves for binary IO. It allows to do IO even for data allocated on GPU * Loading Loading @@ -57,24 +50,27 @@ class File File() = default; /** * \brief Open given file and returns \e true after the file is * successfully opened. Otherwise returns \e false. * \brief Open given file. * * Opens file with given \e fileName in some \e mode from \ref File::Mode. * * Throws \ref std::ios_base::failure on failure. * * Opens file with given \e fileName and returns true/false based on the success in opening the file. * \param fileName String which indicates name of the file user wants to open. * \param mode Indicates what user needs to do with opened file. * \param fileName String which indicates file name. * \param mode Indicates in what mode the will be opened - see. \ref File::Mode. */ void open( const String& fileName, Mode mode = static_cast< Mode >( static_cast< int >( Mode::In ) | static_cast< int >( Mode::Out ) ) ); /** * \brief Attempts to close given file and returns \e true when the file is * successfully closed. Otherwise returns \e false. * \brief Closes the file. * * Throws \ref std::ios_base::failure on failure. */ void close(); /** * \brief Returns name of given file. * \brief Returns name of the file. */ const String& getFileName() const { Loading @@ -82,19 +78,20 @@ class File } /** * \brief Method that can write particular data type from given file into GPU. (Function that gets particular elements from given file.) * \brief Method for reading data with given \e Type from the file. * * Returns \e true when the elements are successfully read from given file. Otherwise returns \e false. * The data will be stored in \e buffer allocated on device given by the * \e Device parameter. * * Throws \ref std::ios_base::failure on failure. * * \tparam Type Type of data. * \tparam Device Place where data are stored after reading from file. For example \ref Devices::Host or \ref Devices::Cuda. * \tparam Index Type of index by which the elements are indexed. * \tparam Device Device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda. * \tparam SourceType Type of index by which the elements are indexed. * \param buffer Pointer in memory where the elements are loaded and stored after reading. * \param elements Number of elements the user wants to get (read) from given file. */ template< typename Type, typename Device = Devices::Host > template< typename Type, typename Device = Devices::Host, typename SourceType = Type > bool read( Type* buffer, std::streamsize elements = 1 ); /** Loading @@ -110,23 +107,26 @@ class File * \param buffer Pointer in memory where the elements are loaded from before writing into file. * \param elements Number of elements the user wants to write into the given file. */ template< typename Type, typename Device = Devices::Host > template< typename Type, typename Device = Devices::Host, typename TargetType = Type > bool write( const Type* buffer, std::streamsize elements = 1 ); protected: template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > Loading @@ -134,17 +134,20 @@ class File template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > Loading @@ -152,6 +155,14 @@ class File std::fstream file; String fileName; /** * When we transfer data between the GPU and the CPU we use 5 MB buffer. This * size should ensure good performance -- see. * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer . * We use the same buffer size even for retyping data during IO operations. */ static constexpr std::streamsize TransferBufferSize = 5 * 2<<20; }; /** Loading src/TNL/File.hpp +64 −17 Original line number Diff line number Diff line Loading @@ -80,7 +80,7 @@ inline void File::close() fileName = ""; } template< typename Type, typename Device > template< typename Type, typename Device, typename SourceType > bool File::read( Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to read must be non-negative." ); Loading @@ -94,25 +94,47 @@ bool File::read( Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, typename SourceType, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { if( std::is_same< Type, SourceType >::value ) { file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements ); return true; } else { const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SourceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; std::streamsize readElements = 0; while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); for( std::streamsize i = 0; i < transfer; i++ ) buffer[ readElements ++ ] = static_cast< Type >( cast_buffer[ i ] ); readElements += transfer; } } } // Cuda template< typename Type, typename Device, typename SourceType, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; std::streamsize readElements = 0; if( std::is_same< Type, SourceType >::value ) { while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, host_buffer_size ); Loading @@ -124,6 +146,27 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) TNL_CHECK_CUDA_DEVICE; readElements += transfer; } } else { const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SorceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); for( std::streamsize i = 0; i < transfer; i++ ) host_buffer[ i ] = static_cast< Type >( cast_buffer[ i ] ); cudaMemcpy( (void*) &buffer[ readElements ], (void*) host_buffer.get(), transfer * sizeof( Type ), cudaMemcpyHostToDevice ); TNL_CHECK_CUDA_DEVICE; readElements += transfer; } } return true; #else throw Exceptions::CudaSupportMissing(); Loading @@ -133,11 +176,12 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, typename SourceType, typename, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading Loading @@ -167,7 +211,7 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) #endif } template< class Type, typename Device > template< class Type, typename Device, typename TargeType > bool File::write( const Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to write must be non-negative." ); Loading @@ -181,6 +225,7 @@ bool File::write( const Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, typename TargetType, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { Loading @@ -191,11 +236,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // Cuda template< typename Type, typename Device, typename TargetType, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading @@ -220,11 +266,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, typename TargetType, typename, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading Loading
src/TNL/File.h +32 −21 Original line number Diff line number Diff line Loading @@ -20,13 +20,6 @@ namespace TNL { /** * When we transfer data between the GPU and the CPU we use 5 MB buffer. This * size should ensure good performance -- see. * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer */ static constexpr std::streamsize FileGPUvsCPUTransferBufferSize = 5 * 2<<20; /** * \brief This class serves for binary IO. It allows to do IO even for data allocated on GPU * Loading Loading @@ -57,24 +50,27 @@ class File File() = default; /** * \brief Open given file and returns \e true after the file is * successfully opened. Otherwise returns \e false. * \brief Open given file. * * Opens file with given \e fileName in some \e mode from \ref File::Mode. * * Throws \ref std::ios_base::failure on failure. * * Opens file with given \e fileName and returns true/false based on the success in opening the file. * \param fileName String which indicates name of the file user wants to open. * \param mode Indicates what user needs to do with opened file. * \param fileName String which indicates file name. * \param mode Indicates in what mode the will be opened - see. \ref File::Mode. */ void open( const String& fileName, Mode mode = static_cast< Mode >( static_cast< int >( Mode::In ) | static_cast< int >( Mode::Out ) ) ); /** * \brief Attempts to close given file and returns \e true when the file is * successfully closed. Otherwise returns \e false. * \brief Closes the file. * * Throws \ref std::ios_base::failure on failure. */ void close(); /** * \brief Returns name of given file. * \brief Returns name of the file. */ const String& getFileName() const { Loading @@ -82,19 +78,20 @@ class File } /** * \brief Method that can write particular data type from given file into GPU. (Function that gets particular elements from given file.) * \brief Method for reading data with given \e Type from the file. * * Returns \e true when the elements are successfully read from given file. Otherwise returns \e false. * The data will be stored in \e buffer allocated on device given by the * \e Device parameter. * * Throws \ref std::ios_base::failure on failure. * * \tparam Type Type of data. * \tparam Device Place where data are stored after reading from file. For example \ref Devices::Host or \ref Devices::Cuda. * \tparam Index Type of index by which the elements are indexed. * \tparam Device Device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda. * \tparam SourceType Type of index by which the elements are indexed. * \param buffer Pointer in memory where the elements are loaded and stored after reading. * \param elements Number of elements the user wants to get (read) from given file. */ template< typename Type, typename Device = Devices::Host > template< typename Type, typename Device = Devices::Host, typename SourceType = Type > bool read( Type* buffer, std::streamsize elements = 1 ); /** Loading @@ -110,23 +107,26 @@ class File * \param buffer Pointer in memory where the elements are loaded from before writing into file. * \param elements Number of elements the user wants to write into the given file. */ template< typename Type, typename Device = Devices::Host > template< typename Type, typename Device = Devices::Host, typename TargetType = Type > bool write( const Type* buffer, std::streamsize elements = 1 ); protected: template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool read_impl( Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename SourceType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > Loading @@ -134,17 +134,20 @@ class File template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, typename = void > bool write_impl( const Type* buffer, std::streamsize elements ); template< typename Type, typename Device, typename TargetType, typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, typename = void, typename = void > Loading @@ -152,6 +155,14 @@ class File std::fstream file; String fileName; /** * When we transfer data between the GPU and the CPU we use 5 MB buffer. This * size should ensure good performance -- see. * http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer . * We use the same buffer size even for retyping data during IO operations. */ static constexpr std::streamsize TransferBufferSize = 5 * 2<<20; }; /** Loading
src/TNL/File.hpp +64 −17 Original line number Diff line number Diff line Loading @@ -80,7 +80,7 @@ inline void File::close() fileName = ""; } template< typename Type, typename Device > template< typename Type, typename Device, typename SourceType > bool File::read( Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to read must be non-negative." ); Loading @@ -94,25 +94,47 @@ bool File::read( Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, typename SourceType, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { if( std::is_same< Type, SourceType >::value ) { file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements ); return true; } else { const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SourceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; std::streamsize readElements = 0; while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); for( std::streamsize i = 0; i < transfer; i++ ) buffer[ readElements ++ ] = static_cast< Type >( cast_buffer[ i ] ); readElements += transfer; } } } // Cuda template< typename Type, typename Device, typename SourceType, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; std::streamsize readElements = 0; if( std::is_same< Type, SourceType >::value ) { while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, host_buffer_size ); Loading @@ -124,6 +146,27 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) TNL_CHECK_CUDA_DEVICE; readElements += transfer; } } else { const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SorceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; while( readElements < elements ) { const std::streamsize transfer = std::min( elements - readElements, cast_buffer_size ); file.read( reinterpret_cast<char*>(cast_buffer.get()), sizeof(SourceType) * transfer ); for( std::streamsize i = 0; i < transfer; i++ ) host_buffer[ i ] = static_cast< Type >( cast_buffer[ i ] ); cudaMemcpy( (void*) &buffer[ readElements ], (void*) host_buffer.get(), transfer * sizeof( Type ), cudaMemcpyHostToDevice ); TNL_CHECK_CUDA_DEVICE; readElements += transfer; } } return true; #else throw Exceptions::CudaSupportMissing(); Loading @@ -133,11 +176,12 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, typename SourceType, typename, typename, typename > bool File::read_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading Loading @@ -167,7 +211,7 @@ bool File::read_impl( Type* buffer, std::streamsize elements ) #endif } template< class Type, typename Device > template< class Type, typename Device, typename TargeType > bool File::write( const Type* buffer, std::streamsize elements ) { TNL_ASSERT_GE( elements, 0, "Number of elements to write must be non-negative." ); Loading @@ -181,6 +225,7 @@ bool File::write( const Type* buffer, std::streamsize elements ) // Host template< typename Type, typename Device, typename TargetType, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { Loading @@ -191,11 +236,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // Cuda template< typename Type, typename Device, typename TargetType, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading @@ -220,11 +266,12 @@ bool File::write_impl( const Type* buffer, std::streamsize elements ) // MIC template< typename Type, typename Device, typename TargetType, typename, typename, typename > bool File::write_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_MIC const std::streamsize host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / (std::streamsize) sizeof(Type), elements ); const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; Loading