Newer
Older
/***************************************************************************
Jakub Klinkovský
committed
MemoryOperationsCuda.hpp - description
-------------------
begin : Jul 16, 2013
copyright : (C) 2013 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
Jakub Klinkovský
committed
#include <iostream>
#include <memory> // std::unique_ptr
Jakub Klinkovský
committed
#include <stdexcept>
#include <TNL/Algorithms/MemoryOperations.h>
#include <TNL/Algorithms/MultiDeviceMemoryOperations.h>
#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Algorithms/Reduction.h>
Jakub Klinkovský
committed
#include <TNL/Exceptions/CudaSupportMissing.h>
Jakub Klinkovský
committed
Jakub Klinkovský
committed
namespace Algorithms {
template< typename Element >
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
setElement( Element* data,
const Element& value )
TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
#ifdef HAVE_CUDA
cudaMemcpy( ( void* ) data, ( void* ) &value, sizeof( Element ), cudaMemcpyHostToDevice );
TNL_CHECK_CUDA_DEVICE;
#else
throw Exceptions::CudaSupportMissing();
#endif
// TODO: For some reason the following does not work after adding
// #ifdef __CUDA_ARCH__ to Array::setElement and ArrayView::setElement.
// Probably it might be a problem with lambda function 'kernel' which
// nvcc probably does not handle properly.
//MemoryOperations< Devices::Cuda >::set( data, value, 1 );
Jakub Klinkovský
committed
}
template< typename Element >
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
getElement( const Element* data )
TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
Jakub Klinkovský
committed
Element result;
#ifdef HAVE_CUDA
cudaMemcpy( ( void* ) &result, ( void* ) data, sizeof( Element ), cudaMemcpyDeviceToHost );
TNL_CHECK_CUDA_DEVICE;
#else
throw Exceptions::CudaSupportMissing();
#endif
// TODO: For some reason the following does not work after adding
// #ifdef __CUDA_ARCH__ to Array::getElement and ArrayView::getElement
// Probably it might be a problem with lambda function 'kernel' which
// nvcc probably does not handle properly.
//MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 );
Jakub Klinkovský
committed
return result;
Jakub Klinkovský
committed
}
template< typename Element, typename Index >
Jakub Klinkovský
committed
void
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
set( Element* data,
const Element& value,
const Index size )
if( size == 0 ) return;
TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
Jakub Klinkovský
committed
auto kernel = [data, value] __cuda_callable__ ( Index i )
Jakub Klinkovský
committed
ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}
template< typename DestinationElement,
typename SourceElement,
typename Index >
Jakub Klinkovský
committed
void
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
copy( DestinationElement* destination,
const SourceElement* source,
const Index size )
if( size == 0 ) return;
Jakub Klinkovský
committed
TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
Jakub Klinkovský
committed
// our ParallelFor kernel is faster than cudaMemcpy
auto kernel = [destination, source] __cuda_callable__ ( Index i )
{
destination[ i ] = source[ i ];
};
ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
template< typename DestinationElement,
Jakub Klinkovský
committed
typename Index,
typename SourceIterator >
Tomáš Oberhuber
committed
void
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
Jakub Klinkovský
committed
copyFromIterator( DestinationElement* destination,
Index destinationSize,
SourceIterator first,
SourceIterator last )
Jakub Klinkovský
committed
using BaseType = typename std::remove_cv< DestinationElement >::type;
const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize );
std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] };
Jakub Klinkovský
committed
Index copiedElements = 0;
while( copiedElements < destinationSize && first != last ) {
Index i = 0;
while( i < buffer_size && first != last )
Jakub Klinkovský
committed
buffer[ i++ ] = *first++;
MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i );
copiedElements += i;
}
if( first != last )
throw std::length_error( "Source iterator is larger than the destination array." );
Jakub Klinkovský
committed
template< typename Element1,
typename Element2,
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
compare( const Element1* destination,
const Element2* source,
if( size == 0 ) return true;
Tomáš Oberhuber
committed
TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
Jakub Klinkovský
committed
Jakub Klinkovský
committed
auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
Tomáš Oberhuber
committed
template< typename Element,
typename Index >
bool
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
containsValue( const Element* data,
const Index size,
const Element& value )
{
if( size == 0 ) return false;
TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
Jakub Klinkovský
committed
TNL_ASSERT_GE( size, (Index) 0, "" );
Jakub Klinkovský
committed
auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false );
Tomáš Oberhuber
committed
template< typename Element,
typename Index >
bool
Jakub Klinkovský
committed
MemoryOperations< Devices::Cuda >::
containsOnlyValue( const Element* data,
const Index size,
const Element& value )
Tomáš Oberhuber
committed
{
if( size == 0 ) return false;
Tomáš Oberhuber
committed
TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
TNL_ASSERT_GE( size, 0, "" );
Jakub Klinkovský
committed
auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; };
return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true );
Tomáš Oberhuber
committed
}
Jakub Klinkovský
committed
} // namespace Algorithms