Moved stuff from CudaDeviceInfo.{cpp,cu} and SystemInfo.cpp into header files (a2fedda0) · Commits · TNL / tnl-dev

src/TNL/Devices/CMakeLists.txt

+4 −19

Original line number	Diff line number	Diff line
		@@ -2,26 +2,11 @@ set (headers Cuda.h
		Cuda_impl.h
		CudaCallable.h
		CudaDeviceInfo.h
		CudaDeviceInfo_impl.h
		Host.h
		MIC.h
		SystemInfo.h )
		SystemInfo.h
		SystemInfo_impl.h
		)

		SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Devices )
		set( common_SOURCES
		${CURRENT_DIR}/SystemInfo.cpp )

		IF( BUILD_CUDA )
		set( tnl_devices_CUDA__SOURCES
		${common_SOURCES}
		${CURRENT_DIR}/CudaDeviceInfo.cu
		PARENT_SCOPE )
		ENDIF()

		set( tnl_devices_SOURCES
		${common_SOURCES}
		${CURRENT_DIR}/CudaDeviceInfo.cpp
		PARENT_SCOPE )

		#SET( libtnlcoreincludedir ${TNL_INCLUDE_DIR}/core )
		#SET( libtnlcoreinclude_HEADERS ${headers} )
		INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Devices )

src/TNL/Devices/CudaDeviceInfo.cpp

deleted100644 → 0

+0 −126

Original line number	Diff line number	Diff line
		/***************************************************************************
		Devices::CudaDeviceInfo.cpp - description
		-------------------
		begin : Jun 21, 2015
		copyright : (C) 2007 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#ifndef HAVE_CUDA

		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Logger.h>

		namespace TNL {
		namespace Devices {

		int
		CudaDeviceInfo::
		getNumberOfDevices()
		{
		return -1;
		}

		int
		CudaDeviceInfo::
		getActiveDevice()
		{
		return -1;
		}

		String
		CudaDeviceInfo::
		getDeviceName( int deviceNum )
		{
		return String( "" );
		}

		int
		CudaDeviceInfo::
		getArchitectureMajor( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getArchitectureMinor( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getClockRate( int deviceNum )
		{
		return 0;
		}

		size_t
		CudaDeviceInfo::
		getGlobalMemory( int deviceNum )
		{
		return 0;
		}

		size_t
		CudaDeviceInfo::
		getFreeGlobalMemory()
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getMemoryClockRate( int deviceNum )
		{
		return 0;
		}

		bool
		CudaDeviceInfo::
		getECCEnabled( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getCudaMultiprocessors( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getCudaCoresPerMultiprocessors( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getCudaCores( int deviceNum )
		{
		return 0;
		}

		int
		CudaDeviceInfo::
		getRegistersPerMultiprocessor( int deviceNum )
		{
		return 0;
		}

		void
		CudaDeviceInfo::
		writeDeviceInfo( Logger& logger )
		{
		}

		} // namespace Devices
		} // namespace TNL

		#endif

src/TNL/Devices/CudaDeviceInfo.cu

deleted100644 → 0

+0 −218

Original line number	Diff line number	Diff line
		/***************************************************************************
		CudaDeviceInfo.cu - description
		-------------------
		begin : Jun 21, 2015
		copyright : (C) 2007 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#ifdef HAVE_CUDA

		#include <unordered_map>

		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Logger.h>

		namespace TNL {
		namespace Devices {

		int
		CudaDeviceInfo::
		getNumberOfDevices()
		{
		int devices;
		cudaGetDeviceCount( &devices );
		return devices;
		}

		int
		CudaDeviceInfo::
		getActiveDevice()
		{
		int device;
		cudaGetDevice( &device );
		return device;
		}

		String
		CudaDeviceInfo::
		getDeviceName( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return String( properties.name );
		}

		int
		CudaDeviceInfo::
		getArchitectureMajor( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.major;
		}

		int
		CudaDeviceInfo::
		getArchitectureMinor( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.minor;
		}

		int
		CudaDeviceInfo::
		getClockRate( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.clockRate;
		}

		size_t
		CudaDeviceInfo::
		getGlobalMemory( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.totalGlobalMem;
		}

		size_t
		CudaDeviceInfo::
		getFreeGlobalMemory()
		{
		size_t free = 0;
		size_t total = 0;
		cudaMemGetInfo( &free, &total );
		return free;
		}

		int
		CudaDeviceInfo::
		getMemoryClockRate( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.memoryClockRate;
		}

		bool
		CudaDeviceInfo::
		getECCEnabled( int deviceNum )
		{
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.ECCEnabled;
		}

		int
		CudaDeviceInfo::
		getCudaMultiprocessors( int deviceNum )
		{
		// results are cached because they are used for configuration of some kernels
		static std::unordered_map< int, int > results;
		if( results.count( deviceNum ) == 0 ) {
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		results.emplace( deviceNum, properties.multiProcessorCount );
		return properties.multiProcessorCount;
		}
		return results[ deviceNum ];
		}

		int
		CudaDeviceInfo::
		getCudaCoresPerMultiprocessors( int deviceNum )
		{
		int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
		int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
		switch( major )
		{
		case 1: // Tesla generation, G80, G8x, G9x classes
		return 8;
		case 2: // Fermi generation
		switch( minor )
		{
		case 0: // GF100 class
		return 32;
		case 1: // GF10x class
		return 48;
		}
		case 3: // Kepler generation -- GK10x, GK11x classes
		return 192;
		case 5: // Maxwell generation -- GM10x, GM20x classes
		return 128;
		case 6: // Pascal generation
		switch( minor )
		{
		case 0: // GP100 class
		return 64;
		case 1: // GP10x classes
		case 2:
		return 128;
		}
		default:
		return -1;
		}
		}

		int
		CudaDeviceInfo::
		getCudaCores( int deviceNum )
		{
		return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
		CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
		}

		int
		CudaDeviceInfo::
		getRegistersPerMultiprocessor( int deviceNum )
		{
		// results are cached because they are used for configuration of some kernels
		static std::unordered_map< int, int > results;
		if( results.count( deviceNum ) == 0 ) {
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		results.emplace( deviceNum, properties.regsPerMultiprocessor );
		return properties.regsPerMultiprocessor;
		}
		return results[ deviceNum ];
		}

		void
		CudaDeviceInfo::
		writeDeviceInfo( Logger& logger )
		{
		logger.writeParameter< String >( "CUDA GPU info", String("") );
		// TODO: Printing all devices does not make sense until TNL can actually
		// use more than one device for computations. Printing only the active
		// device for now...
		// int devices = getNumberOfDevices();
		// writeParameter< int >( "Number of devices", devices, 1 );
		// for( int i = 0; i < devices; i++ )
		// {
		// logger.writeParameter< int >( "Device no.", i, 1 );
		int i = getActiveDevice();
		logger.writeParameter< String >( "Name", getDeviceName( i ), 2 );
		String deviceArch = convertToString( getArchitectureMajor( i ) ) + "." +
		convertToString( getArchitectureMinor( i ) );
		logger.writeParameter< String >( "Architecture", deviceArch, 2 );
		logger.writeParameter< int >( "CUDA cores", getCudaCores( i ), 2 );
		double clockRate = ( double ) getClockRate( i ) / 1.0e3;
		logger.writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 );
		double globalMemory = ( double ) getGlobalMemory( i ) / 1.0e9;
		logger.writeParameter< double >( "Global memory (in GB)", globalMemory, 2 );
		double memoryClockRate = ( double ) getMemoryClockRate( i ) / 1.0e3;
		logger.writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 );
		logger.writeParameter< bool >( "ECC enabled", getECCEnabled( i ), 2 );
		// }
		}

		} // namespace Devices
		} // namespace TNL

		#endif

src/TNL/Devices/CudaDeviceInfo.h

+2 −5

Original line number	Diff line number	Diff line
		@@ -15,9 +15,6 @@
		#include <TNL/String.h>

		namespace TNL {

		class Logger;

		namespace Devices {

		class CudaDeviceInfo
		@@ -51,9 +48,9 @@ class CudaDeviceInfo
		static int getCudaCores( int deviceNum );

		static int getRegistersPerMultiprocessor( int deviceNum );

		static void writeDeviceInfo( Logger& logger );
		};

		} // namespace Devices
		} // namespace TNL

		#include <TNL/Devices/CudaDeviceInfo_impl.h>

src/TNL/Devices/CudaDeviceInfo_impl.h

0 → 100644

+243 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		CudaDeviceInfo_impl.h - description
		-------------------
		begin : Jun 21, 2015
		copyright : (C) 2007 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <unordered_map>

		#include <TNL/Devices/CudaDeviceInfo.h>
		#include <TNL/Exceptions/CudaSupportMissing.h>

		namespace TNL {
		namespace Devices {

		inline int
		CudaDeviceInfo::
		getNumberOfDevices()
		{
		#ifdef HAVE_CUDA
		int devices;
		cudaGetDeviceCount( &devices );
		return devices;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getActiveDevice()
		{
		#ifdef HAVE_CUDA
		int device;
		cudaGetDevice( &device );
		return device;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline String
		CudaDeviceInfo::
		getDeviceName( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return String( properties.name );
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getArchitectureMajor( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.major;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getArchitectureMinor( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.minor;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getClockRate( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.clockRate;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline size_t
		CudaDeviceInfo::
		getGlobalMemory( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.totalGlobalMem;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline size_t
		CudaDeviceInfo::
		getFreeGlobalMemory()
		{
		#ifdef HAVE_CUDA
		size_t free = 0;
		size_t total = 0;
		cudaMemGetInfo( &free, &total );
		return free;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getMemoryClockRate( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.memoryClockRate;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline bool
		CudaDeviceInfo::
		getECCEnabled( int deviceNum )
		{
		#ifdef HAVE_CUDA
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		return properties.ECCEnabled;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getCudaMultiprocessors( int deviceNum )
		{
		#ifdef HAVE_CUDA
		// results are cached because they are used for configuration of some kernels
		static std::unordered_map< int, int > results;
		if( results.count( deviceNum ) == 0 ) {
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		results.emplace( deviceNum, properties.multiProcessorCount );
		return properties.multiProcessorCount;
		}
		return results[ deviceNum ];
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getCudaCoresPerMultiprocessors( int deviceNum )
		{
		#ifdef HAVE_CUDA
		int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
		int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
		switch( major )
		{
		case 1: // Tesla generation, G80, G8x, G9x classes
		return 8;
		case 2: // Fermi generation
		switch( minor )
		{
		case 0: // GF100 class
		return 32;
		case 1: // GF10x class
		return 48;
		}
		case 3: // Kepler generation -- GK10x, GK11x classes
		return 192;
		case 5: // Maxwell generation -- GM10x, GM20x classes
		return 128;
		case 6: // Pascal generation
		switch( minor )
		{
		case 0: // GP100 class
		return 64;
		case 1: // GP10x classes
		case 2:
		return 128;
		}
		default:
		return -1;
		}
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getCudaCores( int deviceNum )
		{
		#ifdef HAVE_CUDA
		return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
		CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		inline int
		CudaDeviceInfo::
		getRegistersPerMultiprocessor( int deviceNum )
		{
		#ifdef HAVE_CUDA
		// results are cached because they are used for configuration of some kernels
		static std::unordered_map< int, int > results;
		if( results.count( deviceNum ) == 0 ) {
		cudaDeviceProp properties;
		cudaGetDeviceProperties( &properties, deviceNum );
		results.emplace( deviceNum, properties.regsPerMultiprocessor );
		return properties.regsPerMultiprocessor;
		}
		return results[ deviceNum ];
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		}

		} // namespace Devices
		} // namespace TNL