/***************************************************************************
                          array-operations.h  -  description
                             -------------------
    begin                : Dec 30, 2015
    copyright            : (C) 2015 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

// Implemented by: Jakub Klinkovsky

#pragma once

#include "benchmarks.h"

#include <TNL/Containers/Array.h>

namespace TNL
{
namespace benchmarks
{

template< typename Real = double,
          typename Index = int >
bool
benchmarkArrayOperations( Benchmark & benchmark,
                          const int & loops,
                          const long & size )
{
    typedef Containers::Array< Real, Devices::Host, Index > HostArray;
    typedef Containers::Array< Real, Devices::Cuda, Index > CudaArray;
    using namespace std;

    double datasetSize = ( double ) ( loops * size ) * sizeof( Real ) / oneGB;

    HostArray hostArray, hostArray2;
    CudaArray deviceArray, deviceArray2;
    if( ! hostArray.setSize( size ) ||
        ! hostArray2.setSize( size )
#ifdef HAVE_CUDA
        ||
        ! deviceArray.setSize( size ) ||
        ! deviceArray2.setSize( size )
#endif
    )

    {
        const char* msg = "error: allocation of arrays failed";
        std::cerr << msg << std::endl;
        benchmark.addErrorMessage( msg );
        return false;
    }

    Real resultHost, resultDevice;


    // reset functions
    auto reset1 = [&]() {
        hostArray.setValue( 1.0 );
#ifdef HAVE_CUDA
        deviceArray.setValue( 1.0 );
#endif
    };
    auto reset2 = [&]() {
        hostArray2.setValue( 1.0 );
#ifdef HAVE_CUDA
        deviceArray2.setValue( 1.0 );
#endif
    };
    auto reset12 = [&]() {
        reset1();
        reset2();
    };


    reset12();


    auto compareHost = [&]() {
        resultHost = (int) hostArray == hostArray2;
    };
    auto compareCuda = [&]() {
        resultDevice = (int) deviceArray == deviceArray2;
    };
    benchmark.setOperation( "comparison (operator==)", 2 * datasetSize );
    benchmark.time( reset1, "CPU", compareHost );
#ifdef HAVE_CUDA
    benchmark.time( reset1, "GPU", compareCuda );
#endif


    auto copyAssignHostHost = [&]() {
        hostArray = hostArray2;
    };
    auto copyAssignCudaCuda = [&]() {
        deviceArray = deviceArray2;
    };
    benchmark.setOperation( "copy (operator=)", 2 * datasetSize );
    benchmark.time( reset1, "CPU", copyAssignHostHost );
#ifdef HAVE_CUDA
    benchmark.time( reset1, "GPU", copyAssignCudaCuda );
#endif


    auto copyAssignHostCuda = [&]() {
        deviceArray = hostArray;
    };
    auto copyAssignCudaHost = [&]() {
        hostArray = deviceArray;
    };
#ifdef HAVE_CUDA
    benchmark.setOperation( "copy (operator=)", datasetSize );
    benchmark.time( reset1,
                    "CPU->GPU", copyAssignHostCuda,
                    "GPU->CPU", copyAssignCudaHost );
#endif


    auto setValueHost = [&]() {
        hostArray.setValue( 3.0 );
    };
    auto setValueCuda = [&]() {
        deviceArray.setValue( 3.0 );
    };
    benchmark.setOperation( "setValue", datasetSize );
    benchmark.time( reset1, "CPU", setValueHost );
#ifdef HAVE_CUDA
    benchmark.time( reset1, "GPU", setValueCuda );
#endif


    auto setSizeHost = [&]() {
        hostArray.setSize( size );
    };
    auto setSizeCuda = [&]() {
        deviceArray.setSize( size );
    };
    auto resetSize1 = [&]() {
        hostArray.reset();
#ifdef HAVE_CUDA
        deviceArray.reset();
#endif
    };
    benchmark.setOperation( "allocation (setSize)", datasetSize );
    benchmark.time( resetSize1, "CPU", setSizeHost );
#ifdef HAVE_CUDA
    benchmark.time( resetSize1, "GPU", setSizeCuda );
#endif


    auto resetSizeHost = [&]() {
        hostArray.reset();
    };
    auto resetSizeCuda = [&]() {
        deviceArray.reset();
    };
    auto setSize1 = [&]() {
        hostArray.setSize( size );
#ifdef HAVE_CUDA
        deviceArray.setSize( size );
#endif
    };
    benchmark.setOperation( "deallocation (reset)", datasetSize );
    benchmark.time( setSize1, "CPU", resetSizeHost );
#ifdef HAVE_CUDA
    benchmark.time( setSize1, "GPU", resetSizeCuda );
#endif

    return true;
}

} // namespace benchmarks
} // namespace tnl