Skip to content
Snippets Groups Projects
array-operations.h 4.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • /***************************************************************************
                              array-operations.h  -  description
                                 -------------------
        begin                : Dec 30, 2015
        copyright            : (C) 2015 by Tomas Oberhuber et al.
        email                : tomas.oberhuber@fjfi.cvut.cz
     ***************************************************************************/
    
    /* See Copyright Notice in tnl/Copyright */
    
    // Implemented by: Jakub Klinkovsky
    
    
    #include <TNL/Containers/Array.h>
    
    namespace TNL
    
    {
    namespace benchmarks
    {
    
    template< typename Real = double,
              typename Index = int >
    bool
    benchmarkArrayOperations( Benchmark & benchmark,
                              const int & loops,
    
                              const long & size )
    
        typedef Containers::Array< Real, Devices::Host, Index > HostArray;
        typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
    
        using namespace std;
    
        double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;
    
        HostArray hostArray, hostArray2;
        CudaArray deviceArray, deviceArray2;
    
        if( ! hostArray.setSize( size ) ||
    
            ! hostArray2.setSize( size )
    #ifdef HAVE_CUDA
            ||
    
            ! deviceArray.setSize( size ) ||
    
            ! deviceArray2.setSize( size )
    #endif
        )
    
    
        {
            const char* msg = "error: allocation of arrays failed";
    
            std::cerr << msg << std::endl;
    
            benchmark.addErrorMessage( msg );
    
    
        Real resultHost, resultDevice;
    
    
        // reset functions
        auto reset1 = [&]() {
            hostArray.setValue( 1.0 );
    
    #ifdef HAVE_CUDA
    
        };
        auto reset2 = [&]() {
            hostArray2.setValue( 1.0 );
    
    #ifdef HAVE_CUDA
    
        };
        auto reset12 = [&]() {
            reset1();
            reset2();
        };
    
    
        reset12();
    
    
        auto compareHost = [&]() {
            resultHost = (int) hostArray == hostArray2;
        };
        auto compareCuda = [&]() {
            resultDevice = (int) deviceArray == deviceArray2;
        };
        benchmark.setOperation( "comparison (operator==)", 2 * datasetSize );
    
        benchmark.time( reset1, "CPU", compareHost );
    #ifdef HAVE_CUDA
        benchmark.time( reset1, "GPU", compareCuda );
    #endif
    
    
    
        auto copyAssignHostHost = [&]() {
            hostArray = hostArray2;
        };
        auto copyAssignCudaCuda = [&]() {
            deviceArray = deviceArray2;
        };
        benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
    
        benchmark.time( reset1, "CPU", copyAssignHostHost );
    #ifdef HAVE_CUDA
        benchmark.time( reset1, "GPU", copyAssignCudaCuda );
    #endif
    
    
    
        auto copyAssignHostCuda = [&]() {
            deviceArray = hostArray;
        };
        auto copyAssignCudaHost = [&]() {
            hostArray = deviceArray;
        };
    
    #ifdef HAVE_CUDA
    
        benchmark.setOperation( "copy (operator=)", datasetSize );
    
        benchmark.time( reset1,
                        "CPU->GPU", copyAssignHostCuda,
                        "GPU->CPU", copyAssignCudaHost );
    
    
        auto setValueHost = [&]() {
            hostArray.setValue( 3.0 );
        };
        auto setValueCuda = [&]() {
            deviceArray.setValue( 3.0 );
        };
        benchmark.setOperation( "setValue", datasetSize );
    
        benchmark.time( reset1, "CPU", setValueHost );
    #ifdef HAVE_CUDA
        benchmark.time( reset1, "GPU", setValueCuda );
    #endif
    
    
    
        auto setSizeHost = [&]() {
            hostArray.setSize( size );
        };
        auto setSizeCuda = [&]() {
            deviceArray.setSize( size );
        };
        auto resetSize1 = [&]() {
            hostArray.reset();
    
    #ifdef HAVE_CUDA
    
        };
        benchmark.setOperation( "allocation (setSize)", datasetSize );
    
        benchmark.time( resetSize1, "CPU", setSizeHost );
    #ifdef HAVE_CUDA
        benchmark.time( resetSize1, "GPU", setSizeCuda );
    #endif
    
    
    
        auto resetSizeHost = [&]() {
            hostArray.reset();
        };
        auto resetSizeCuda = [&]() {
            deviceArray.reset();
        };
        auto setSize1 = [&]() {
            hostArray.setSize( size );
    
    #ifdef HAVE_CUDA
    
        };
        benchmark.setOperation( "deallocation (reset)", datasetSize );
    
        benchmark.time( setSize1, "CPU", resetSizeHost );
    #ifdef HAVE_CUDA
        benchmark.time( setSize1, "GPU", resetSizeCuda );
    #endif
    
        return true;