Skip to content
Snippets Groups Projects
MemoryOperationsCuda.hpp 6.05 KiB
Newer Older
  • Learn to ignore specific revisions
  • /***************************************************************************
    
                                 -------------------
        begin                : Jul 16, 2013
        copyright            : (C) 2013 by Tomas Oberhuber
        email                : tomas.oberhuber@fjfi.cvut.cz
     ***************************************************************************/
    
    /* See Copyright Notice in tnl/Copyright */
    
    
    #include <iostream>
    #include <memory>  // std::unique_ptr
    
    #include <TNL/Algorithms/MemoryOperations.h>
    #include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
    #include <TNL/Algorithms/ParallelFor.h>
    #include <TNL/Algorithms/Reduction.h>
    
    #include <TNL/Exceptions/CudaSupportMissing.h>
    
    
    template< typename Element >
    
    setElement( Element* data,
                const Element& value )
    
       TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
    
    #ifdef HAVE_CUDA
       cudaMemcpy( ( void* ) data, ( void* ) &value, sizeof( Element ), cudaMemcpyHostToDevice );
    
    #else
       throw Exceptions::CudaSupportMissing();
    
    #endif
       // TODO: For some reason the following does not work after adding
       // #ifdef __CUDA_ARCH__ to Array::setElement and ArrayView::setElement.
       // Probably it might be a problem with lambda function 'kernel' which
       // nvcc probably does not handle properly.
       //MemoryOperations< Devices::Cuda >::set( data, value, 1 );
    
    
    template< typename Element >
    
    getElement( const Element* data )
    
       TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
    
    #ifdef HAVE_CUDA
       cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
    
    #else
       throw Exceptions::CudaSupportMissing();
    
    #endif
       // TODO: For some reason the following does not work after adding
       // #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement 
       // Probably it might be a problem with lambda function 'kernel' which
       // nvcc probably does not handle properly.
       //MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
    
    
    template< typename Element, typename Index >
    
    set( Element* data,
         const Element& value,
         const Index size )
    
       if( size == 0 ) return;
    
       TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
    
       auto kernel = [data, value] __cuda_callable__ ( Index i )
    
       ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
    
    }
    
    template< typename DestinationElement,
              typename SourceElement,
              typename Index >
    
    copy( DestinationElement* destination,
          const SourceElement* source,
          const Index size )
    
       if( size == 0 ) return;
    
       TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
       TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
    
    
       // our ParallelFor kernel is faster than cudaMemcpy
       auto kernel = [destination, source] __cuda_callable__ ( Index i )
       {
          destination[ i ] = source[ i ];
       };
       ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
    
    template< typename DestinationElement,
    
    copyFromIterator( DestinationElement* destination,
                      Index destinationSize,
                      SourceIterator first,
                      SourceIterator last )
    
       using BaseType = typename std::remove_cv< DestinationElement >::type;
    
       const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
       std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
    
       Index copiedElements = 0;
       while( copiedElements < destinationSize && first != last ) {
          Index i = 0;
    
          while( i < buffer_size && first != last )
    
             buffer[ i++ ] = *first++;
          MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
          copiedElements += i;
       }
       if( first != last )
          throw std::length_error( "Source iterator is larger than the destination array." );
    
    template< typename Element1,
              typename Element2,
    
    MemoryOperations< Devices::Cuda >::
    compare( const Element1* destination,
             const Element2* source,
    
       if( size == 0 ) return true;
    
       TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
       TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
    
       auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
       return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
    
    template< typename Element,
              typename Index >
    bool
    
    containsValue( const Element* data,
                   const Index size,
                   const Element& value )
    {
    
       if( size == 0 ) return false;
    
       TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
    
       auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
       return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
    
    containsOnlyValue( const Element* data,
                       const Index size,
                       const Element& value )
    
       if( size == 0 ) return false;
    
       TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
       TNL_ASSERT_GE( size, 0, "" );
    
       auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
       return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );