Skip to content
Snippets Groups Projects
ArrayOperationsCuda_impl.h 31.8 KiB
Newer Older
/***************************************************************************
                          ArrayOperationsCuda_impl.h  -  description
                             -------------------
    begin                : Jul 16, 2013
    copyright            : (C) 2013 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once 

#include <iostream>
#include <TNL/tnlConfig.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Exceptions/CudaBadAlloc.h>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Containers/Algorithms/ReductionOperations.h>
namespace Containers {   

template< typename Element, typename Index >
ArrayOperations< Devices::Cuda >::
allocateMemory( Element*& data,
                const Index size )
   if( cudaMalloc( ( void** ) &data,
                   ( size_t ) size * sizeof( Element ) ) != cudaSuccess )
      throw Exceptions::CudaBadAlloc();
   }
   TNL_CHECK_CUDA_DEVICE;
   throw Exceptions::CudaSupportMissing();
#endif
}

template< typename Element >
ArrayOperations< Devices::Cuda >::
freeMemory( Element* data )
   TNL_ASSERT_TRUE( data, "Attempted to free a nullptr." );
   cudaFree( data );
   TNL_CHECK_CUDA_DEVICE;
   throw Exceptions::CudaSupportMissing();
#endif
}

template< typename Element >
ArrayOperations< Devices::Cuda >::
setMemoryElement( Element* data,
                  const Element& value )
   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
   ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 );
}

template< typename Element >
ArrayOperations< Devices::Cuda >::
getMemoryElement( const Element* data )
   TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
   ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 );
   return result;
}


#ifdef HAVE_CUDA
template< typename Element, typename Index >
__global__ void
setArrayValueCudaKernel( Element* data,
                         const Index size,
                         const Element value )
{
   Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
   const Index maxGridSize = blockDim. x * gridDim. x;
   while( elementIdx < size )
   {
      data[ elementIdx ] = value;
      elementIdx += maxGridSize;
   }
}
#endif

template< typename Element, typename Index >
bool
ArrayOperations< Devices::Cuda >::
setMemory( Element* data,
           const Element& value,
           const Index size )
   TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
#ifdef HAVE_CUDA
   dim3 blockSize( 0 ), gridSize( 0 );
   blockSize. x = 256;
   Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
   gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
   setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value );
   return TNL_CHECK_CUDA_DEVICE;
   throw Exceptions::CudaSupportMissing();
#endif
}

#ifdef HAVE_CUDA
template< typename DestinationElement,
          typename SourceElement,
          typename Index >
__global__ void
copyMemoryCudaToCudaKernel( DestinationElement* destination,
                            const SourceElement* source,
                            const Index size )
{
   Index elementIdx = blockDim. x * blockIdx. x + threadIdx. x;
   const Index maxGridSize = blockDim. x * gridDim. x;
   while( elementIdx < size )
   {
      destination[ elementIdx ] = source[ elementIdx ];
      elementIdx += maxGridSize;
   }
}
#endif

template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool
ArrayOperations< Devices::Cuda >::
copyMemory( DestinationElement* destination,
            const SourceElement* source,
            const Index size )
   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
#ifdef HAVE_CUDA
   if( std::is_same< DestinationElement, SourceElement >::value )
   {
      cudaMemcpy( destination,
                  source,
                  size * sizeof( DestinationElement ),
                  cudaMemcpyDeviceToDevice );
      return TNL_CHECK_CUDA_DEVICE;
   }
   else
   {
      dim3 blockSize( 0 ), gridSize( 0 );
      blockSize. x = 256;
      Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x );
      gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() );
      copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size );
      return TNL_CHECK_CUDA_DEVICE;
   }
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

template< typename Element1,
          typename Element2,
          typename Index >
bool
ArrayOperations< Devices::Cuda >::
compareMemory( const Element1* destination,
               const Element2* source,
               const Index size )
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
   //TODO: The parallel reduction on the CUDA device with different element types is needed.
   Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
   Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result );
}

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Cuda >::
containsValue( const Element* data,
               const Index size,
               const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
   if( size == 0 ) return false;
   bool result = false;
   Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue;
   reductionContainsValue.setValue( value );
   Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, 0, result );
}

template< typename Element,
          typename Index >
bool
ArrayOperations< Devices::Cuda >::
containsOnlyValue( const Element* data,
                   const Index size,
                   const Element& value )
{
   TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "" );
   if( size == 0 ) return false;
   bool result = false;
   Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue;
   reductionContainsOnlyValue.setValue( value );
   Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, 0, result );
/****
 * Operations CUDA -> Host
 */
template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool
ArrayOperations< Devices::Host, Devices::Cuda >::
copyMemory( DestinationElement* destination,
            const SourceElement* source,
            const Index size )
   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
#ifdef HAVE_CUDA
   if( std::is_same< DestinationElement, SourceElement >::value )
   {
      if( cudaMemcpy( destination,
                      source,
                      size * sizeof( DestinationElement ),
                      cudaMemcpyDeviceToHost ) != cudaSuccess )
         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
      return TNL_CHECK_CUDA_DEVICE;
      SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ];
      Index i( 0 );
      while( i < size )
      {
         if( cudaMemcpy( buffer,
                         &source[ i ],
                         min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ),
                         cudaMemcpyDeviceToHost ) != cudaSuccess )
         {
            delete[] buffer;
            std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
            return TNL_CHECK_CUDA_DEVICE;
         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
         {
            destination[ i + j ] = buffer[ j ];
            j++;
         }
         i += j;
      }
      delete[] buffer;
   }
   return true;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}


template< typename Element1,
          typename Element2,
          typename Index >
bool
ArrayOperations< Devices::Host, Devices::Cuda >::
compareMemory( const Element1* destination,
               const Element2* source,
               const Index size )
{
   /***
    * Here, destination is on host and source is on CUDA device.
    */
   TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
#ifdef HAVE_CUDA
   Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ];
   Index compared( 0 );
   while( compared < size )
   {
      Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() );
      if( cudaMemcpy( ( void* ) host_buffer,
                      ( void* ) & ( source[ compared ] ),
                      transfer * sizeof( Element2 ),
                      cudaMemcpyDeviceToHost ) != cudaSuccess )
      {
         delete[] host_buffer;
         std::cerr << "Transfer of data from CUDA device to host failed." << std::endl;
         return TNL_CHECK_CUDA_DEVICE;
      if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) )
      {
         delete[] host_buffer;
         return false;
      }
      compared += transfer;
   }
   delete[] host_buffer;
   return true;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

/****
 * Operations Host -> CUDA
 */
template< typename DestinationElement,
          typename SourceElement,
          typename Index >
bool
ArrayOperations< Devices::Cuda, Devices::Host >::
copyMemory( DestinationElement* destination,
            const SourceElement* source,
            const Index size )
   TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
   TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
#ifdef HAVE_CUDA
   if( std::is_same< DestinationElement, SourceElement >::value )
   {
      if( cudaMemcpy( destination,
                      source,
                      size * sizeof( DestinationElement ),
                      cudaMemcpyHostToDevice ) != cudaSuccess )
         std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
      return TNL_CHECK_CUDA_DEVICE;
      DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ];
      Index i( 0 );
      while( i < size )
      {
         Index j( 0 );
         while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size )
         {
            buffer[ j ] = source[ i + j ];
            j++;
         }
         if( cudaMemcpy( &destination[ i ],
                         buffer,
                         j * sizeof( DestinationElement ),
                         cudaMemcpyHostToDevice ) != cudaSuccess )
         {
            delete[] buffer;
            std::cerr << "Transfer of data from host to CUDA device failed." << std::endl;
            return TNL_CHECK_CUDA_DEVICE;
#else
   throw Exceptions::CudaSupportMissing();
#endif
}

template< typename Element1,
          typename Element2,
          typename Index >
bool
ArrayOperations< Devices::Cuda, Devices::Host >::
compareMemory( const Element1* hostData,
               const Element2* deviceData,
               const Index size )
   TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
   TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
   return ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory( deviceData, hostData, size );
}

#ifdef TEMPLATE_EXPLICIT_INSTANTIATION

extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        int >( char*& data, const int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         int >( int*& data, const int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    int >( long int*& data, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       int >( float*& data, const int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      int >( double*& data, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< char,        long int >( char*& data, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< int,         long int >( int*& data, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int,    long int >( long int*& data, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< float,       long int >( float*& data, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< double,      long int >( double*& data, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< char        >( char* data );
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< int         >( int* data );
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< long int    >( long int* data );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< float       >( float* data );
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< double      >( double* data );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data );
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< char        >( char* data, const char& value );
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< int         >( int* data, const int& value );
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int    >( long int* data, const long int& value );
#ifdef INSTANTIATE_FLOAT
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< float       >( float* data, const float& value );
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< double      >( double* data, const double& value );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value );
extern template char        ArrayOperations< Devices::Cuda >::getMemoryElement< char        >( const char* data );
extern template int         ArrayOperations< Devices::Cuda >::getMemoryElement< int         >( const int* data );
extern template long int    ArrayOperations< Devices::Cuda >::getMemoryElement< long int    >( const long int* data );
#ifdef INSTANTIATE_FLOAT
extern template float       ArrayOperations< Devices::Cuda >::getMemoryElement< float       >( const float* data );
extern template double      ArrayOperations< Devices::Cuda >::getMemoryElement< double      >( const double* data );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, int >( char* destination, const char* source, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, int >( int* destination, const int* source, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, int >( long int* destination, const long int* source, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, int >( float* destination, const float* source, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, int >( double* destination, const double* source, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char,               char, long int >( char* destination, const char* source, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int,                 int, long int >( int* destination, const int* source, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int,       long int, long int >( long int* destination, const long int* source, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float,             float, long int >( float* destination, const float* source, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double,           double, long int >( double* destination, const double* source, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, int >( const char* data1, const char* data2, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, int >( const int* data1, const int* data2, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, int >( const long int* data1, const long int* data2, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, int >( const float* data1, const float* data2, const int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, int >( const double* data1, const double* data2, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char,               char, long int >( const char* data1, const char* data2, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int,                 int, long int >( const int* data1, const int* data2, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int,       long int, long int >( const long int* data1, const long int* data2, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float,             float, long int >( const float* data1, const float* data2, const long int size );
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double,           double, long int >( const double* data1, const double* data2, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< char,        int >( char* destination, const char& value, const int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< int,         int >( int* destination, const int& value, const int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    int >( long int* destination, const long int& value, const int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::setMemory< float,       int >( float* destination, const float& value, const int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< double,      int >( double* destination, const double& value, const int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size );
#endif

#ifdef INSTANTIATE_LONG_INT
extern template bool ArrayOperations< Devices::Cuda >::setMemory< char,        long int >( char* destination, const char& value, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< int,         long int >( int* destination, const int& value, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< long int,    long int >( long int* destination, const long int& value, const long int size );
#ifdef INSTANTIATE_FLOAT
extern template bool ArrayOperations< Devices::Cuda >::setMemory< float,       long int >( float* destination, const float& value, const long int size );
extern template bool ArrayOperations< Devices::Cuda >::setMemory< double,      long int >( double* destination, const double& value, const long int size );
#ifdef INSTANTIATE_LONG_DOUBLE
extern template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size );
} // namespace Containers