Commit a2fedda0 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Moved stuff from CudaDeviceInfo.{cpp,cu} and SystemInfo.cpp into header files

parent bd7bc17c
Loading
Loading
Loading
Loading
+4 −19
Original line number Diff line number Diff line
@@ -2,26 +2,11 @@ set (headers Cuda.h
             Cuda_impl.h
             CudaCallable.h
             CudaDeviceInfo.h
             CudaDeviceInfo_impl.h
             Host.h
             MIC.h
             SystemInfo.h )
             SystemInfo.h
             SystemInfo_impl.h
)

SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL/Devices )
set( common_SOURCES
     ${CURRENT_DIR}/SystemInfo.cpp )

IF( BUILD_CUDA )
   set( tnl_devices_CUDA__SOURCES
        ${common_SOURCES} 
        ${CURRENT_DIR}/CudaDeviceInfo.cu
        PARENT_SCOPE )
ENDIF()    

set( tnl_devices_SOURCES     
     ${common_SOURCES}
     ${CURRENT_DIR}/CudaDeviceInfo.cpp
     PARENT_SCOPE )
    
#SET( libtnlcoreincludedir ${TNL_INCLUDE_DIR}/core )
#SET( libtnlcoreinclude_HEADERS ${headers} )
INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Devices )
+0 −126
Original line number Diff line number Diff line
/***************************************************************************
                          Devices::CudaDeviceInfo.cpp  -  description
                             -------------------
    begin                : Jun 21, 2015
    copyright            : (C) 2007 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#ifndef HAVE_CUDA

#include <TNL/Devices/CudaDeviceInfo.h>
#include <TNL/Logger.h>

namespace TNL {
namespace Devices {   

int
CudaDeviceInfo::
getNumberOfDevices()
{
   return -1;
}

int
CudaDeviceInfo::
getActiveDevice()
{
   return -1;
}

String
CudaDeviceInfo::
getDeviceName( int deviceNum )
{
   return String( "" );
}

int
CudaDeviceInfo::
getArchitectureMajor( int deviceNum )
{
    return 0;
}

int
CudaDeviceInfo::
getArchitectureMinor( int deviceNum )
{
    return 0;
}

int
CudaDeviceInfo::
getClockRate( int deviceNum )
{
   return 0;
}

size_t
CudaDeviceInfo::
getGlobalMemory( int deviceNum )
{
   return 0;
}

size_t
CudaDeviceInfo::
getFreeGlobalMemory()
{
   return 0;
}

int
CudaDeviceInfo::
getMemoryClockRate( int deviceNum )
{
   return 0;
}

bool
CudaDeviceInfo::
getECCEnabled( int deviceNum )
{
   return 0;
}

int
CudaDeviceInfo::
getCudaMultiprocessors( int deviceNum )
{
   return 0;
}

int
CudaDeviceInfo::
getCudaCoresPerMultiprocessors( int deviceNum )
{
   return 0;
}

int
CudaDeviceInfo::
getCudaCores( int deviceNum )
{
   return 0;
}

int
CudaDeviceInfo::
getRegistersPerMultiprocessor( int deviceNum )
{
   return 0;
}

void
CudaDeviceInfo::
writeDeviceInfo( Logger& logger )
{
}

} // namespace Devices
} // namespace TNL

#endif

src/TNL/Devices/CudaDeviceInfo.cu

deleted100644 → 0
+0 −218
Original line number Diff line number Diff line
/***************************************************************************
                          CudaDeviceInfo.cu  -  description
                             -------------------
    begin                : Jun 21, 2015
    copyright            : (C) 2007 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#ifdef HAVE_CUDA

#include <unordered_map>

#include <TNL/Devices/CudaDeviceInfo.h>
#include <TNL/Logger.h>

namespace TNL {
namespace Devices {

int
CudaDeviceInfo::
getNumberOfDevices()
{
    int devices;
    cudaGetDeviceCount( &devices );
    return devices;
}

int
CudaDeviceInfo::
getActiveDevice()
{
    int device;
    cudaGetDevice( &device );
    return device;
}

String
CudaDeviceInfo::
getDeviceName( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return String( properties.name );
}

int
CudaDeviceInfo::
getArchitectureMajor( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.major;
}

int
CudaDeviceInfo::
getArchitectureMinor( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.minor;
}

int
CudaDeviceInfo::
getClockRate( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.clockRate;
}

size_t
CudaDeviceInfo::
getGlobalMemory( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.totalGlobalMem;
}

size_t
CudaDeviceInfo::
getFreeGlobalMemory()
{
   size_t free = 0;
   size_t total = 0;
   cudaMemGetInfo( &free, &total );
   return free;
}

int
CudaDeviceInfo::
getMemoryClockRate( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.memoryClockRate;
}

bool
CudaDeviceInfo::
getECCEnabled( int deviceNum )
{
    cudaDeviceProp properties;
    cudaGetDeviceProperties( &properties, deviceNum );
    return properties.ECCEnabled;
}

int
CudaDeviceInfo::
getCudaMultiprocessors( int deviceNum )
{
    // results are cached because they are used for configuration of some kernels
    static std::unordered_map< int, int > results;
    if( results.count( deviceNum ) == 0 ) {
        cudaDeviceProp properties;
        cudaGetDeviceProperties( &properties, deviceNum );
        results.emplace( deviceNum, properties.multiProcessorCount );
        return properties.multiProcessorCount;
    }
    return results[ deviceNum ];
}

int
CudaDeviceInfo::
getCudaCoresPerMultiprocessors( int deviceNum )
{
    int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
    int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
    switch( major )
    {
        case 1:   // Tesla generation, G80, G8x, G9x classes
            return 8;
        case 2:   // Fermi generation
            switch( minor )
            {
                case 0:  // GF100 class
                    return 32;
                case 1:  // GF10x class
                    return 48;
            }
        case 3: // Kepler generation -- GK10x, GK11x classes
            return 192;
        case 5: // Maxwell generation -- GM10x, GM20x classes
            return 128;
        case 6: // Pascal generation
            switch( minor )
            {
                case 0:  // GP100 class
                    return 64;
                case 1:  // GP10x classes
                case 2:
                    return 128;
            }
        default:
            return -1;
    }
}

int
CudaDeviceInfo::
getCudaCores( int deviceNum )
{
    return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
           CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
}

int
CudaDeviceInfo::
getRegistersPerMultiprocessor( int deviceNum )
{
    // results are cached because they are used for configuration of some kernels
    static std::unordered_map< int, int > results;
    if( results.count( deviceNum ) == 0 ) {
        cudaDeviceProp properties;
        cudaGetDeviceProperties( &properties, deviceNum );
        results.emplace( deviceNum, properties.regsPerMultiprocessor );
        return properties.regsPerMultiprocessor;
    }
    return results[ deviceNum ];
}

void
CudaDeviceInfo::
writeDeviceInfo( Logger& logger )
{
   logger.writeParameter< String >( "CUDA GPU info", String("") );
   // TODO: Printing all devices does not make sense until TNL can actually
   //       use more than one device for computations. Printing only the active
   //       device for now...
//   int devices = getNumberOfDevices();
//   writeParameter< int >( "Number of devices", devices, 1 );
//   for( int i = 0; i < devices; i++ )
//   {
//      logger.writeParameter< int >( "Device no.", i, 1 );
      int i = getActiveDevice();
      logger.writeParameter< String >( "Name", getDeviceName( i ), 2 );
      String deviceArch = convertToString( getArchitectureMajor( i ) ) + "." +
                          convertToString( getArchitectureMinor( i ) );
      logger.writeParameter< String >( "Architecture", deviceArch, 2 );
      logger.writeParameter< int >( "CUDA cores", getCudaCores( i ), 2 );
      double clockRate = ( double ) getClockRate( i ) / 1.0e3;
      logger.writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 );
      double globalMemory = ( double ) getGlobalMemory( i ) / 1.0e9;
      logger.writeParameter< double >( "Global memory (in GB)", globalMemory, 2 );
      double memoryClockRate = ( double ) getMemoryClockRate( i ) / 1.0e3;
      logger.writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 );
      logger.writeParameter< bool >( "ECC enabled", getECCEnabled( i ), 2 );
//   }
}

} // namespace Devices
} // namespace TNL

#endif
+2 −5
Original line number Diff line number Diff line
@@ -15,9 +15,6 @@
#include <TNL/String.h>

namespace TNL {

class Logger;

namespace Devices {

class CudaDeviceInfo
@@ -51,9 +48,9 @@ class CudaDeviceInfo
      static int getCudaCores( int deviceNum );

      static int getRegistersPerMultiprocessor( int deviceNum );

      static void writeDeviceInfo( Logger& logger );
};

} // namespace Devices
} // namespace TNL

#include <TNL/Devices/CudaDeviceInfo_impl.h>
+243 −0
Original line number Diff line number Diff line
/***************************************************************************
                          CudaDeviceInfo_impl.h  -  description
                             -------------------
    begin                : Jun 21, 2015
    copyright            : (C) 2007 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <unordered_map>

#include <TNL/Devices/CudaDeviceInfo.h>
#include <TNL/Exceptions/CudaSupportMissing.h>

namespace TNL {
namespace Devices {

inline int
CudaDeviceInfo::
getNumberOfDevices()
{
#ifdef HAVE_CUDA
   int devices;
   cudaGetDeviceCount( &devices );
   return devices;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getActiveDevice()
{
#ifdef HAVE_CUDA
   int device;
   cudaGetDevice( &device );
   return device;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline String
CudaDeviceInfo::
getDeviceName( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return String( properties.name );
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getArchitectureMajor( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.major;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getArchitectureMinor( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.minor;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getClockRate( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.clockRate;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline size_t
CudaDeviceInfo::
getGlobalMemory( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.totalGlobalMem;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline size_t
CudaDeviceInfo::
getFreeGlobalMemory()
{
#ifdef HAVE_CUDA
   size_t free = 0;
   size_t total = 0;
   cudaMemGetInfo( &free, &total );
   return free;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getMemoryClockRate( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.memoryClockRate;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline bool
CudaDeviceInfo::
getECCEnabled( int deviceNum )
{
#ifdef HAVE_CUDA
   cudaDeviceProp properties;
   cudaGetDeviceProperties( &properties, deviceNum );
   return properties.ECCEnabled;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getCudaMultiprocessors( int deviceNum )
{
#ifdef HAVE_CUDA
   // results are cached because they are used for configuration of some kernels
   static std::unordered_map< int, int > results;
   if( results.count( deviceNum ) == 0 ) {
      cudaDeviceProp properties;
      cudaGetDeviceProperties( &properties, deviceNum );
      results.emplace( deviceNum, properties.multiProcessorCount );
      return properties.multiProcessorCount;
   }
   return results[ deviceNum ];
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getCudaCoresPerMultiprocessors( int deviceNum )
{
#ifdef HAVE_CUDA
   int major = CudaDeviceInfo::getArchitectureMajor( deviceNum );
   int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum );
   switch( major )
   {
      case 1:   // Tesla generation, G80, G8x, G9x classes
         return 8;
      case 2:   // Fermi generation
         switch( minor )
         {
            case 0:  // GF100 class
               return 32;
            case 1:  // GF10x class
               return 48;
         }
      case 3: // Kepler generation -- GK10x, GK11x classes
         return 192;
      case 5: // Maxwell generation -- GM10x, GM20x classes
         return 128;
      case 6: // Pascal generation
         switch( minor )
         {
            case 0:  // GP100 class
               return 64;
            case 1:  // GP10x classes
            case 2:
               return 128;
         }
      default:
         return -1;
   }
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getCudaCores( int deviceNum )
{
#ifdef HAVE_CUDA
   return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) *
          CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum );
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

inline int
CudaDeviceInfo::
getRegistersPerMultiprocessor( int deviceNum )
{
#ifdef HAVE_CUDA
   // results are cached because they are used for configuration of some kernels
   static std::unordered_map< int, int > results;
   if( results.count( deviceNum ) == 0 ) {
      cudaDeviceProp properties;
      cudaGetDeviceProperties( &properties, deviceNum );
      results.emplace( deviceNum, properties.regsPerMultiprocessor );
      return properties.regsPerMultiprocessor;
   }
   return results[ deviceNum ];
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

} // namespace Devices
} // namespace TNL
Loading