Commit cc56bf24 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Debugging memory operations on the CUDA device.

parent f9b0abaf
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -49,8 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" )
    find_package( CUDA )
    if( CUDA_FOUND )
        set( BUILD_CUDA TRUE)
        #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
        AddCompilerFlag( "-DHAVE_CUDA" )
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA )
        if( CUDA_ARCHITECTURE STREQUAL "1.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10)
        endif()
+175 −49
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@
#define MEMORYFUNCTIONS_H_

#include <core/cuda/device-check.h>
#include <core/cuda/cuda-reduction.h>
#include <core/cuda/reduction-operations.h>
#include <core/mfuncs.h>
#include <tnlConfig.h>

@@ -102,9 +104,8 @@ bool setMemoryCuda( Element* data,
   dim3 blockSize( 0 ), gridSize( 0 );
   blockSize. x = 256;
   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
   gridSize. x = Min( blocksNumber, ( Index ) maxCudaGridSize );
   //cout << "blocksNumber = " << blocksNumber << "Grid size = " << gridSize. x << " elementsPerThread = " << elementsPerThread << endl;
   setVectorValueCudaKernel<<< blockSize, gridSize >>>( data, size, value );
   gridSize. x = Min( blocksNumber, ( Index ) maxCudaGridSize - 1 );
   setVectorValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );

   return checkCudaDevice;
#else
@@ -252,9 +253,14 @@ bool compareMemoryCuda( const Element* deviceData1,
                        const Index size )
{
#ifdef HAVE_CUDA
   return tnlCUDALongVectorComparison( size,
   bool result;
   tnlParallelReductionEqualities< Element, Index > operation;
   reductionOnCudaDevice( operation,
                          size,
                          deviceData1,
                                       deviceData2 );
                          deviceData2,
                          result );
   return result;
#else
   cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl;
   return false;
@@ -263,48 +269,168 @@ bool compareMemoryCuda( const Element* deviceData1,

#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

#ifdef HAVE_CUDA
extern template bool copyMemoryCudaToHost( char* destination,
                                    const char* source,
                                    const int size );

extern template bool copyMemoryCudaToHost( int* destination,
                                    const int* source,
                                    const int size );

extern template bool copyMemoryCudaToHost( long int* destination,
                                    const long int* source,
                                    const int size );

extern template bool copyMemoryCudaToHost( float* destination,
                                    const float* source,
                                    const int size );

extern template bool copyMemoryCudaToHost( double* destination,
                                    const double* source,
                                    const int size );

extern template bool copyMemoryCudaToHost( char* destination,
                                    const char* source,
                                    const long int size );
extern template bool allocateMemoryHost( char*& data, const int size );
extern template bool allocateMemoryHost( int*& data, const int size );
extern template bool allocateMemoryHost( long int*& data, const int size );
extern template bool allocateMemoryHost( float*& data, const int size );
extern template bool allocateMemoryHost( double*& data, const int size );
extern template bool allocateMemoryHost( long double*& data, const int size );

extern template bool allocateMemoryHost( char*& data, const long int size );
extern template bool allocateMemoryHost( int*& data, const long int size );
extern template bool allocateMemoryHost( long int*& data, const long int size );
extern template bool allocateMemoryHost( float*& data, const long int size );
extern template bool allocateMemoryHost( double*& data, const long int size );
extern template bool allocateMemoryHost( long double*& data, const long int size );

extern template bool allocateMemoryCuda( char*& data, const int size );
extern template bool allocateMemoryCuda( int*& data, const int size );
extern template bool allocateMemoryCuda( long int*& data, const int size );
extern template bool allocateMemoryCuda( float*& data, const int size );
extern template bool allocateMemoryCuda( double*& data, const int size );
extern template bool allocateMemoryCuda( long double*& data, const int size );

extern template bool allocateMemoryCuda( char*& data, const long int size );
extern template bool allocateMemoryCuda( int*& data, const long int size );
extern template bool allocateMemoryCuda( long int*& data, const long int size );
extern template bool allocateMemoryCuda( float*& data, const long int size );
extern template bool allocateMemoryCuda( double*& data, const long int size );
extern template bool allocateMemoryCuda( long double*& data, const long int size );

extern template bool freeMemoryHost( char* data );
extern template bool freeMemoryHost( int* data );
extern template bool freeMemoryHost( long int* data );
extern template bool freeMemoryHost( float* data );
extern template bool freeMemoryHost( double* data );
extern template bool freeMemoryHost( long double* data );

extern template bool freeMemoryCuda( char* data );
extern template bool freeMemoryCuda( int* data );
extern template bool freeMemoryCuda( long int* data );
extern template bool freeMemoryCuda( float* data );
extern template bool freeMemoryCuda( double* data );
extern template bool freeMemoryCuda( long double* data );

extern template bool setMemoryHost( char* data, const char& value, const int size );
extern template bool setMemoryHost( int* data, const int& value, const int size );
extern template bool setMemoryHost( long int* data, const long int& value, const int size );
extern template bool setMemoryHost( float* data, const float& value, const int size );
extern template bool setMemoryHost( double* data, const double& value, const int size );
extern template bool setMemoryHost( long double* data, const long double& value, const int size );

extern template bool setMemoryHost( char* data, const char& value, const long int size );
extern template bool setMemoryHost( int* data, const int& value, const long int size );
extern template bool setMemoryHost( long int* data, const long int& value, const long int size );
extern template bool setMemoryHost( float* data, const float& value, const long int size );
extern template bool setMemoryHost( double* data, const double& value, const long int size );
extern template bool setMemoryHost( long double* data, const long double& value, const long int size );

extern template bool setMemoryCuda( char* data, const char& value, const int size );
extern template bool setMemoryCuda( int* data, const int& value, const int size );
extern template bool setMemoryCuda( long int* data, const long int& value, const int size );
extern template bool setMemoryCuda( float* data, const float& value, const int size );
extern template bool setMemoryCuda( double* data, const double& value, const int size );
extern template bool setMemoryCuda( long double* data, const long double& value, const int size );

extern template bool setMemoryCuda( char* data, const char& value, const long int size );
extern template bool setMemoryCuda( int* data, const int& value, const long int size );
extern template bool setMemoryCuda( long int* data, const long int& value, const long int size );
extern template bool setMemoryCuda( float* data, const float& value, const long int size );
extern template bool setMemoryCuda( double* data, const double& value, const long int size );
extern template bool setMemoryCuda( long double* data, const long double& value, const long int size );

extern template bool copyMemoryHostToHost( char* destination, const char* source, const int size );
extern template bool copyMemoryHostToHost( int* destination, const int* source, const int size );
extern template bool copyMemoryHostToHost( long int* destination, const long int* source, const int size );
extern template bool copyMemoryHostToHost( float* destination, const float* source, const int size );
extern template bool copyMemoryHostToHost( double* destination, const double* source, const int size );
extern template bool copyMemoryHostToHost( long double* destination, const long double* source, const int size );

extern template bool copyMemoryHostToHost( char* destination, const char* source, const long int size );
extern template bool copyMemoryHostToHost( int* destination, const int* source, const long int size );
extern template bool copyMemoryHostToHost( long int* destination, const long int* source, const long int size );
extern template bool copyMemoryHostToHost( float* destination, const float* source, const long int size );
extern template bool copyMemoryHostToHost( double* destination, const double* source, const long int size );
extern template bool copyMemoryHostToHost( long double* destination, const long double* source, const long int size );

extern template bool copyMemoryCudaToHost( char* destination, const char* source, const int size );
extern template bool copyMemoryCudaToHost( int* destination, const int* source, const int size );
extern template bool copyMemoryCudaToHost( long int* destination, const long int* source, const int size );
extern template bool copyMemoryCudaToHost( float* destination, const float* source, const int size );
extern template bool copyMemoryCudaToHost( double* destination, const double* source, const int size );

extern template bool copyMemoryCudaToHost( char* destination, const char* source, const long int size );
extern template bool copyMemoryCudaToHost( int* destination, const int* source, const long int size );
extern template bool copyMemoryCudaToHost( long int* destination, const long int* source, const long int size );
extern template bool copyMemoryCudaToHost( float* destination, const float* source, const long int size );
extern template bool copyMemoryCudaToHost( double* destination, const double* source, const long int size );

extern template bool copyMemoryHostToCuda( char* destination, const char* source, const int size );
extern template bool copyMemoryHostToCuda( int* destination, const int* source, const int size );
extern template bool copyMemoryHostToCuda( long int* destination, const long int* source, const int size );
extern template bool copyMemoryHostToCuda( float* destination, const float* source, const int size );
extern template bool copyMemoryHostToCuda( double* destination, const double* source, const int size );

extern template bool copyMemoryHostToCuda( char* destination, const char* source, const long int size );
extern template bool copyMemoryHostToCuda( int* destination, const int* source, const long int size );
extern template bool copyMemoryHostToCuda( long int* destination, const long int* source, const long int size );
extern template bool copyMemoryHostToCuda( float* destination, const float* source, const long int size );
extern template bool copyMemoryHostToCuda( double* destination, const double* source, const long int size );

extern template bool copyMemoryCudaToCuda( char* destination, const char* source, const int size );
extern template bool copyMemoryCudaToCuda( int* destination, const int* source, const int size );
extern template bool copyMemoryCudaToCuda( long int* destination, const long int* source, const int size );
extern template bool copyMemoryCudaToCuda( float* destination, const float* source, const int size );
extern template bool copyMemoryCudaToCuda( double* destination, const double* source, const int size );

extern template bool copyMemoryCudaToCuda( char* destination, const char* source, const long int size );
extern template bool copyMemoryCudaToCuda( int* destination, const int* source, const long int size );
extern template bool copyMemoryCudaToCuda( long int* destination, const long int* source, const long int size );
extern template bool copyMemoryCudaToCuda( float* destination, const float* source, const long int size );
extern template bool copyMemoryCudaToCuda( double* destination, const double* source, const long int size );

extern template bool compareMemoryHost( const char* data1, const char* data2, const int size );
extern template bool compareMemoryHost( const int* data1, const int* data2, const int size );
extern template bool compareMemoryHost( const long int* data1, const long int* data2, const int size );
extern template bool compareMemoryHost( const float* data1, const float* data2, const int size );
extern template bool compareMemoryHost( const double* data1, const double* data2, const int size );
extern template bool compareMemoryHost( const long double* data1, const long double* data2, const int size );

extern template bool compareMemoryHost( const char* data1, const char* data2, const long int size );
extern template bool compareMemoryHost( const int* data1, const int* data2, const long int size );
extern template bool compareMemoryHost( const long int* data1, const long int* data2, const long int size );
extern template bool compareMemoryHost( const float* data1, const float* data2, const long int size );
extern template bool compareMemoryHost( const double* data1, const double* data2, const long int size );
extern template bool compareMemoryHost( const long double* data1, const long double* data2, const long int size );

extern template bool compareMemoryHostCuda( const char* data1, const char* data2, const int size );
extern template bool compareMemoryHostCuda( const int* data1, const int* data2, const int size );
extern template bool compareMemoryHostCuda( const long int* data1, const long int* data2, const int size );
extern template bool compareMemoryHostCuda( const float* data1, const float* data2, const int size );
extern template bool compareMemoryHostCuda( const double* data1, const double* data2, const int size );
extern template bool compareMemoryHostCuda( const long double* data1, const long double* data2, const int size );

extern template bool compareMemoryHostCuda( const char* data1, const char* data2, const long int size );
extern template bool compareMemoryHostCuda( const int* data1, const int* data2, const long int size );
extern template bool compareMemoryHostCuda( const long int* data1, const long int* data2, const long int size );
extern template bool compareMemoryHostCuda( const float* data1, const float* data2, const long int size );
extern template bool compareMemoryHostCuda( const double* data1, const double* data2, const long int size );
extern template bool compareMemoryHostCuda( const long double* data1, const long double* data2, const long int size );

extern template bool compareMemoryCuda( const char* data1, const char* data2, const int size );
extern template bool compareMemoryCuda( const int* data1, const int* data2, const int size );
extern template bool compareMemoryCuda( const long int* data1, const long int* data2, const int size );
extern template bool compareMemoryCuda( const float* data1, const float* data2, const int size );
extern template bool compareMemoryCuda( const double* data1, const double* data2, const int size );
extern template bool compareMemoryCuda( const long double* data1, const long double* data2, const int size );

extern template bool compareMemoryCuda( const char* data1, const char* data2, const long int size );
extern template bool compareMemoryCuda( const int* data1, const int* data2, const long int size );
extern template bool compareMemoryCuda( const long int* data1, const long int* data2, const long int size );
extern template bool compareMemoryCuda( const float* data1, const float* data2, const long int size );
extern template bool compareMemoryCuda( const double* data1, const double* data2, const long int size );
extern template bool compareMemoryCuda( const long double* data1, const long double* data2, const long int size );

extern template bool copyMemoryCudaToHost( int* destination,
                                    const int* source,
                                    const long int size );

extern template bool copyMemoryCudaToHost( long int* destination,
                                    const long int* source,
                                    const long int size );

extern template bool copyMemoryCudaToHost( float* destination,
                                    const float* source,
                                    const long int size );

extern template bool copyMemoryCudaToHost( double* destination,
                                    const double* source,
                                    const long int size );

#endif
#endif

#endif /* MEMORYFUNCTIONS_H_ */
+165 −0

File changed.

Preview size limit exceeded, changes collapsed.

+161 −39

File changed.

Preview size limit exceeded, changes collapsed.

+3 −0
Original line number Diff line number Diff line
@@ -20,12 +20,15 @@
#ifdef HAVE_CPPUNIT

#include <cppunit/ui/text/TestRunner.h>
#include <cppunit/CompilerOutputter.h>
#include <iostream>
#include "tnlCudaMemoryOperationsTester.h"
 
int main( int argc, char* argv[] )
{
   CppUnit :: TextTestRunner runner;
   runner. addTest( tnlCudaMemoryOperationsTester :: suite() );
   //runner.setOutputter(new CppUnit::CompilerOutputter(&runner.result(), std::cout));
   if( ! runner.run() )
      return EXIT_FAILURE;
   return EXIT_SUCCESS;
Loading