Commit 9fe43e5e authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Re-formatted source files under src/TNL/ with clang-format

parent 2d802491
......@@ -9,7 +9,7 @@
#pragma once
#ifdef HAVE_CUDA
#include <cuda.h>
#include <cuda.h>
#endif
#include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h>
......@@ -19,7 +19,8 @@ namespace TNL {
namespace Algorithms {
template< typename Device >
struct AtomicOperations{};
struct AtomicOperations
{};
template<>
struct AtomicOperations< Devices::Host >
......@@ -30,9 +31,10 @@ struct AtomicOperations< Devices::Host >
TNL_NVCC_HD_WARNING_DISABLE
template< typename Value >
__cuda_callable__
static void add( Value& v, const Value& a )
static void
add( Value& v, const Value& a )
{
#pragma omp atomic update
#pragma omp atomic update
v += a;
}
};
......@@ -46,7 +48,8 @@ struct AtomicOperations< Devices::Sequential >
TNL_NVCC_HD_WARNING_DISABLE
template< typename Value >
__cuda_callable__
static void add( Value& v, const Value& a )
static void
add( Value& v, const Value& a )
{
v += a;
}
......@@ -57,54 +60,56 @@ struct AtomicOperations< Devices::Cuda >
{
template< typename Value >
__cuda_callable__
static void add( Value& v, const Value& a )
static void
add( Value& v, const Value& a )
{
#ifdef HAVE_CUDA
atomicAdd( &v, a );
#endif // HAVE_CUDA
#endif // HAVE_CUDA
}
#ifdef HAVE_CUDA
__device__
static void add( double& v, const double& a )
static void
add( double& v, const double& a )
{
#if __CUDA_ARCH__ < 600
unsigned long long int* v_as_ull = ( unsigned long long int* ) &v;
#if __CUDA_ARCH__ < 600
unsigned long long int* v_as_ull = (unsigned long long int*) &v;
unsigned long long int old = *v_as_ull, assumed;
do
{
do {
assumed = old;
old = atomicCAS( v_as_ull,
assumed,
__double_as_longlong( a + __longlong_as_double( assumed ) ) ) ;
old = atomicCAS( v_as_ull, assumed, __double_as_longlong( a + __longlong_as_double( assumed ) ) );
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
}
while( assumed != old );
#else // __CUDA_ARCH__ < 600
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while( assumed != old );
#else // __CUDA_ARCH__ < 600
atomicAdd( &v, a );
#endif //__CUDA_ARCH__ < 600
#endif //__CUDA_ARCH__ < 600
}
#else // HAVE_CUDA
static void add( double& v, const double& a ){}
#endif // HAVE_CUDA
#else // HAVE_CUDA
static void
add( double& v, const double& a )
{}
#endif // HAVE_CUDA
__cuda_callable__
static void add( long int& v, const long int& a )
static void
add( long int& v, const long int& a )
{
#ifdef HAVE_CUDA
TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." );
#endif // HAVE_CUDA
#endif // HAVE_CUDA
}
__cuda_callable__
static void add( short int& v, const short int& a )
static void
add( short int& v, const short int& a )
{
#ifdef HAVE_CUDA
TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." );
#endif // HAVE_CUDA
#endif // HAVE_CUDA
}
};
} //namespace Algorithms
} //namespace TNL
} // namespace Algorithms
} // namespace TNL
......@@ -19,72 +19,76 @@ namespace Algorithms {
class CudaReductionBuffer
{
public:
inline static CudaReductionBuffer& getInstance()
{
static CudaReductionBuffer instance;
return instance;
}
public:
inline static CudaReductionBuffer&
getInstance()
{
static CudaReductionBuffer instance;
return instance;
}
inline void setSize( size_t size )
{
inline void
setSize( size_t size )
{
#ifdef HAVE_CUDA
if( size > this->size )
{
this->free();
if( cudaMalloc( ( void** ) &this->data, size ) != cudaSuccess ) {
this->data = 0;
throw Exceptions::CudaBadAlloc();
}
this->size = size;
if( size > this->size ) {
this->free();
if( cudaMalloc( (void**) &this->data, size ) != cudaSuccess ) {
this->data = 0;
throw Exceptions::CudaBadAlloc();
}
this->size = size;
}
#else
throw Exceptions::CudaSupportMissing();
throw Exceptions::CudaSupportMissing();
#endif
}
}
template< typename Type >
Type* getData()
{
return ( Type* ) this->data;
}
template< typename Type >
Type*
getData()
{
return (Type*) this->data;
}
private:
// stop the compiler generating methods of copy the object
CudaReductionBuffer( CudaReductionBuffer const& copy ); // Not Implemented
CudaReductionBuffer& operator=( CudaReductionBuffer const& copy ); // Not Implemented
private:
// stop the compiler generating methods of copy the object
CudaReductionBuffer( CudaReductionBuffer const& copy ); // Not Implemented
CudaReductionBuffer&
operator=( CudaReductionBuffer const& copy ); // Not Implemented
// private constructor of the singleton
inline CudaReductionBuffer( size_t size = 0 )
{
// private constructor of the singleton
inline CudaReductionBuffer( size_t size = 0 )
{
#ifdef HAVE_CUDA
setSize( size );
atexit( CudaReductionBuffer::free_atexit );
setSize( size );
atexit( CudaReductionBuffer::free_atexit );
#endif
}
}
inline static void free_atexit( void )
{
CudaReductionBuffer::getInstance().free();
}
inline static void
free_atexit( void )
{
CudaReductionBuffer::getInstance().free();
}
protected:
inline void free( void )
{
protected:
inline void
free( void )
{
#ifdef HAVE_CUDA
if( data )
{
cudaFree( data );
data = nullptr;
TNL_CHECK_CUDA_DEVICE;
}
#endif
if( data ) {
cudaFree( data );
data = nullptr;
TNL_CHECK_CUDA_DEVICE;
}
#endif
}
void* data = nullptr;
void* data = nullptr;
size_t size = 0;
size_t size = 0;
};
} // namespace Algorithms
} // namespace TNL
} // namespace Algorithms
} // namespace TNL
......@@ -22,182 +22,149 @@ struct MemoryOperations< Devices::Sequential >
{
template< typename Element, typename Index >
__cuda_callable__
static void construct( Element* data,
const Index size );
static void
construct( Element* data, const Index size );
// note that args are passed by reference to the constructor, not via
// std::forward since move-semantics does not apply for the construction of
// multiple elements
template< typename Element, typename Index, typename... Args >
__cuda_callable__
static void construct( Element* data,
const Index size,
const Args&... args );
static void
construct( Element* data, const Index size, const Args&... args );
template< typename Element, typename Index >
__cuda_callable__
static void destruct( Element* data,
const Index size );
static void
destruct( Element* data, const Index size );
template< typename Element >
__cuda_callable__
static void setElement( Element* data,
const Element& value );
static void
setElement( Element* data, const Element& value );
template< typename Element >
__cuda_callable__
static Element getElement( const Element* data );
static Element
getElement( const Element* data );
template< typename Element, typename Index >
__cuda_callable__
static void set( Element* data,
const Element& value,
const Index size );
static void
set( Element* data, const Element& value, const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
template< typename DestinationElement, typename SourceElement, typename Index >
__cuda_callable__
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename Index,
typename SourceIterator >
static void copyFromIterator( DestinationElement* destination,
Index destinationSize,
SourceIterator first,
SourceIterator last );
template< typename Element1,
typename Element2,
typename Index >
static void
copy( DestinationElement* destination, const SourceElement* source, const Index size );
template< typename DestinationElement, typename Index, typename SourceIterator >
static void
copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, SourceIterator last );
template< typename Element1, typename Element2, typename Index >
__cuda_callable__
static bool compare( const Element1* destination,
const Element2* source,
const Index size );
static bool
compare( const Element1* destination, const Element2* source, const Index size );
};
template<>
struct MemoryOperations< Devices::Host >
{
template< typename Element, typename Index >
static void construct( Element* data,
const Index size );
static void
construct( Element* data, const Index size );
// note that args are passed by reference to the constructor, not via
// std::forward since move-semantics does not apply for the construction of
// multiple elements
template< typename Element, typename Index, typename... Args >
static void construct( Element* data,
const Index size,
const Args&... args );
static void
construct( Element* data, const Index size, const Args&... args );
template< typename Element, typename Index >
static void destruct( Element* data,
const Index size );
static void
destruct( Element* data, const Index size );
// this is __cuda_callable__ only to silence nvcc warnings
TNL_NVCC_HD_WARNING_DISABLE
template< typename Element >
__cuda_callable__
static void setElement( Element* data,
const Element& value );
static void
setElement( Element* data, const Element& value );
// this is __cuda_callable__ only to silence nvcc warnings
TNL_NVCC_HD_WARNING_DISABLE
template< typename Element >
__cuda_callable__
static Element getElement( const Element* data );
static Element
getElement( const Element* data );
template< typename Element, typename Index >
static void set( Element* data,
const Element& value,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename Index,
typename SourceIterator >
static void copyFromIterator( DestinationElement* destination,
Index destinationSize,
SourceIterator first,
SourceIterator last );
template< typename Element1,
typename Element2,
typename Index >
static bool compare( const Element1* destination,
const Element2* source,
const Index size );
static void
set( Element* data, const Element& value, const Index size );
template< typename DestinationElement, typename SourceElement, typename Index >
static void
copy( DestinationElement* destination, const SourceElement* source, const Index size );
template< typename DestinationElement, typename Index, typename SourceIterator >
static void
copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, SourceIterator last );
template< typename Element1, typename Element2, typename Index >
static bool
compare( const Element1* destination, const Element2* source, const Index size );
};
template<>
struct MemoryOperations< Devices::Cuda >
{
template< typename Element, typename Index >
static void construct( Element* data,
const Index size );
static void
construct( Element* data, const Index size );
// note that args are passed by value to the constructor, not via
// std::forward or even by reference, since move-semantics does not apply for
// the construction of multiple elements and pass-by-reference cannot be used
// with CUDA kernels
template< typename Element, typename Index, typename... Args >
static void construct( Element* data,
const Index size,
const Args&... args );
static void
construct( Element* data, const Index size, const Args&... args );
template< typename Element, typename Index >
static void destruct( Element* data,
const Index size );
static void
destruct( Element* data, const Index size );
template< typename Element >
__cuda_callable__
static void setElement( Element* data,
const Element& value );
static void
setElement( Element* data, const Element& value );
template< typename Element >
__cuda_callable__
static Element getElement( const Element* data );
static Element
getElement( const Element* data );
template< typename Element, typename Index >
static void set( Element* data,
const Element& value,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename Index,
typename SourceIterator >
static void copyFromIterator( DestinationElement* destination,
Index destinationSize,
SourceIterator first,
SourceIterator last );
template< typename Element1,
typename Element2,
typename Index >
static bool compare( const Element1* destination,
const Element2* source,
const Index size );
static void
set( Element* data, const Element& value, const Index size );
template< typename DestinationElement, typename SourceElement, typename Index >
static void
copy( DestinationElement* destination, const SourceElement* source, const Index size );
template< typename DestinationElement, typename Index, typename SourceIterator >
static void
copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, SourceIterator last );
template< typename Element1, typename Element2, typename Index >
static bool
compare( const Element1* destination, const Element2* source, const Index size );
};
} // namespace Algorithms
} // namespace TNL
} // namespace Algorithms
} // namespace TNL
#include <TNL/Algorithms/MemoryOperationsSequential.hpp>
#include <TNL/Algorithms/MemoryOperationsHost.hpp>
......
......@@ -21,59 +21,51 @@ namespace Algorithms {
template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
construct( Element* data,
const Index size )
MemoryOperations< Devices::Cuda >::construct( Element* data, const Index size )
{
TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
auto kernel = [data] __cuda_callable__ ( Index i )
auto kernel = [ data ] __cuda_callable__( Index i )
{
// placement-new
::new( (void*) (data + i) ) Element();
::new( (void*) ( data + i ) ) Element();
};
ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}
template< typename Element, typename Index, typename... Args >
void
MemoryOperations< Devices::Cuda >::
construct( Element* data,
const Index size,
const Args&... args )
MemoryOperations< Devices::Cuda >::construct( Element* data, const Index size, const Args&... args )
{
TNL_ASSERT_TRUE( data, "Attempted to create elements through a nullptr." );
// NOTE: nvcc does not allow __cuda_callable__ lambdas with a variadic capture
auto kernel = [data] __cuda_callable__ ( Index i, Args... args )
auto kernel = [ data ] __cuda_callable__( Index i, Args... args )
{
// placement-new
// (note that args are passed by value to the constructor, not via
// std::forward or even by reference, since move-semantics does not apply for
// the construction of multiple elements and pass-by-reference cannot be used
// with CUDA kernels)
::new( (void*) (data + i) ) Element( args... );
::new( (void*) ( data + i ) ) Element( args... );
};
ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel, args... );
}
template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
destruct( Element* data,
const Index size )
MemoryOperations< Devices::Cuda >::destruct( Element* data, const Index size )
{
TNL_ASSERT_TRUE( data, "Attempted to destroy data through a nullptr." );
auto kernel = [data] __cuda_callable__ ( Index i )
auto kernel = [ data ] __cuda_callable__( Index i )
{
(data + i)->~Element();
( data + i )->~Element();
};
ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel );
}
template< typename Element >
__cuda_callable__ void
MemoryOperations< Devices::Cuda >::
setElement( Element* data,
const Element& value )
__cuda_callable__
void
MemoryOperations< Devices::Cuda >::setElement( Element* data, const Element& value )
{
TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
#ifdef __CUDA_ARCH__
......@@ -88,9 +80,9 @@ setElement( Element* data,
}
template< typename Element >
__cuda_callable__ Element
MemoryOperations< Devices::Cuda >::
getElement( const Element* data )
__cuda_callable__
Element
MemoryOperations< Devices::Cuda >::getElement( const Element* data )
{
TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
#ifdef __CUDA_ARCH__
......@@ -104,53 +96,44 @@ getElement( const Element* data )
template< typename Element, typename Index >
void
MemoryOperations< Devices::Cuda >::
set( Element* data,
const Element& value,
const Index size )
MemoryOperations< Devices::Cuda >::set( Element* data, const Element& value, const Index