Commit 3e785f01 authored by Jakub Klinkovský's avatar Jakub Klinkovský

Merge branch 'ndarray' into 'develop'

NDArray

See merge request !18
parents 84b7a213 05634323
add_subdirectory( HeatEquation )
add_subdirectory( BLAS )
add_subdirectory( NDArray )
add_subdirectory( SpMV )
add_subdirectory( DistSpMV )
add_subdirectory( LinearSolvers )
......
add_executable( tnl-benchmark-ndarray tnl-benchmark-ndarray.cpp )
target_compile_options( tnl-benchmark-ndarray PRIVATE ${CXX_TESTS_FLAGS} )
install( TARGETS tnl-benchmark-ndarray RUNTIME DESTINATION bin )
add_executable( tnl-benchmark-ndarray-boundary tnl-benchmark-ndarray-boundary.cpp )
target_compile_options( tnl-benchmark-ndarray-boundary PRIVATE ${CXX_TESTS_FLAGS} )
install( TARGETS tnl-benchmark-ndarray-boundary RUNTIME DESTINATION bin )
if( BUILD_CUDA )
cuda_add_executable( tnl-benchmark-ndarray-cuda tnl-benchmark-ndarray-cuda.cu
OPTIONS ${CXX_TESTS_FLAGS} )
install( TARGETS tnl-benchmark-ndarray-cuda RUNTIME DESTINATION bin )
cuda_add_executable( tnl-benchmark-ndarray-boundary-cuda tnl-benchmark-ndarray-boundary-cuda.cu
OPTIONS ${CXX_TESTS_FLAGS} )
install( TARGETS tnl-benchmark-ndarray-boundary-cuda RUNTIME DESTINATION bin )
endif()
#include "tnl-benchmark-ndarray-boundary.h"
#include "tnl-benchmark-ndarray-boundary.h"
This diff is collapsed.
#include "tnl-benchmark-ndarray.h"
This diff is collapsed.
......@@ -22,6 +22,42 @@ template< typename DestinationDevice,
typename SourceDevice = DestinationDevice >
struct ArrayOperations;
// TODO: establish the concept of a "void device" for static computations in the whole TNL
template<>
struct ArrayOperations< void >
{
template< typename Element >
__cuda_callable__
static void setElement( Element* data,
const Element& value );
template< typename Element >
__cuda_callable__
static Element getElement( const Element* data );
template< typename Element, typename Index >
__cuda_callable__
static void set( Element* data,
const Element& value,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
__cuda_callable__
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename Element1,
typename Element2,
typename Index >
__cuda_callable__
static bool compare( const Element1* destination,
const Element2* source,
const Index size );
};
template<>
struct ArrayOperations< Devices::Host >
{
......@@ -251,6 +287,7 @@ struct ArrayOperations< Devices::Host, Devices::MIC >
} // namespace Containers
} // namespace TNL
#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsMIC.hpp>
/***************************************************************************
ArrayOperationsStatic_impl.h - description
-------------------
begin : Apr 8, 2019
copyright : (C) 2019 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#pragma once
#include <TNL/Containers/Algorithms/ArrayOperations.h>
namespace TNL {
namespace Containers {
namespace Algorithms {
template< typename Element >
__cuda_callable__
void
ArrayOperations< void >::
setElement( Element* data,
const Element& value )
{
*data = value;
}
template< typename Element >
__cuda_callable__
Element
ArrayOperations< void >::
getElement( const Element* data )
{
return *data;
}
template< typename Element, typename Index >
__cuda_callable__
void
ArrayOperations< void >::
set( Element* data,
const Element& value,
const Index size )
{
for( Index i = 0; i < size; i ++ )
data[ i ] = value;
}
template< typename DestinationElement,
typename SourceElement,
typename Index >
__cuda_callable__
void
ArrayOperations< void >::
copy( DestinationElement* destination,
const SourceElement* source,
const Index size )
{
for( Index i = 0; i < size; i ++ )
destination[ i ] = source[ i ];
}
template< typename Element1,
typename Element2,
typename Index >
__cuda_callable__
bool
ArrayOperations< void >::
compare( const Element1* destination,
const Element2* source,
const Index size )
{
for( Index i = 0; i < size; i++ )
if( ! ( destination[ i ] == source[ i ] ) )
return false;
return true;
}
} // namespace Algorithms
} // namespace Containers
} // namespace TNL
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/***************************************************************************
NDArrayIndexer.h - description
-------------------
begin : Apr 14, 2019
copyright : (C) 2019 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include <TNL/Containers/ndarray/Indexing.h>
#include <TNL/Containers/ndarray/SizesHolderHelpers.h> // StorageSizeGetter
#include <TNL/Containers/ndarray/Subarrays.h> // DummyStrideBase
namespace TNL {
namespace Containers {
template< typename SizesHolder,
typename Permutation,
typename Base,
typename StridesHolder = __ndarray_impl::DummyStrideBase< typename SizesHolder::IndexType, SizesHolder::getDimension() >,
typename Overlaps = __ndarray_impl::make_constant_index_sequence< SizesHolder::getDimension(), 0 > >
class NDArrayIndexer
: public StridesHolder
{
public:
using IndexType = typename SizesHolder::IndexType;
using NDBaseType = Base;
using SizesHolderType = SizesHolder;
using StridesHolderType = StridesHolder;
using PermutationType = Permutation;
using OverlapsType = Overlaps;
static_assert( StridesHolder::getDimension() == SizesHolder::getDimension(),
"Dimension of strides does not match the dimension of sizes." );
static_assert( Permutation::size() == SizesHolder::getDimension(),
"Dimension of permutation does not match the dimension of sizes." );
static_assert( Overlaps::size() == SizesHolder::getDimension(),
"Dimension of overlaps does not match the dimension of sizes." );
__cuda_callable__
NDArrayIndexer() = default;
// explicit initialization by sizes and strides
__cuda_callable__
NDArrayIndexer( SizesHolder sizes, StridesHolder strides )
: StridesHolder(strides), sizes(sizes) {}
static constexpr std::size_t getDimension()
{
return SizesHolder::getDimension();
}
__cuda_callable__
const SizesHolderType& getSizes() const
{
return sizes;
}
template< std::size_t level >
__cuda_callable__
IndexType getSize() const
{
return sizes.template getSize< level >();
}
// method template from base class
using StridesHolder::getStride;
template< std::size_t level >
static constexpr std::size_t getOverlap()
{
return __ndarray_impl::get< level >( Overlaps{} );
}
// returns the product of the aligned sizes
__cuda_callable__
IndexType getStorageSize() const
{
using Alignment = typename Base::template Alignment< Permutation >;
return __ndarray_impl::StorageSizeGetter< SizesHolder, Alignment, Overlaps >::get( sizes );
}
template< typename... IndexTypes >
__cuda_callable__
IndexType
getStorageIndex( IndexTypes&&... indices ) const
{
static_assert( sizeof...( indices ) == SizesHolder::getDimension(), "got wrong number of indices" );
return Base::template getStorageIndex< Permutation, Overlaps >
( sizes,
static_cast< const StridesHolder& >( *this ),
std::forward< IndexTypes >( indices )... );
}
protected:
// non-const reference accessor cannot be public - only subclasses like NDArrayStorage may modify the sizes
__cuda_callable__
SizesHolderType& getSizes()
{
return sizes;
}
SizesHolder sizes;
};
} // namespace Containers
} // namespace TNL
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/***************************************************************************
SynchronizerBuffers.h - description
-------------------
begin : Mar 30, 2019
copyright : (C) 2019 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include <TNL/Containers/NDArray.h>
namespace TNL {
namespace Containers {
namespace __ndarray_impl {
template< typename DistributedNDArray, std::size_t level >
struct SynchronizerBuffersLayer
{
SynchronizerBuffersLayer& getDimBuffers( std::integral_constant< std::size_t, level > )
{
return *this;
}
using NDArrayType = NDArray< typename DistributedNDArray::ValueType,
typename DistributedNDArray::SizesHolderType,
typename DistributedNDArray::PermutationType,
typename DistributedNDArray::DeviceType >;
NDArrayType left_send_buffer, left_recv_buffer, right_send_buffer, right_recv_buffer;
typename DistributedNDArray::LocalBeginsType left_send_offsets, left_recv_offsets, right_send_offsets, right_recv_offsets;
int left_neighbor = -1;
int right_neighbor = -1;
void reset()
{
left_send_buffer.reset();
left_recv_buffer.reset();
right_send_buffer.reset();
right_recv_buffer.reset();
left_send_offsets = left_recv_offsets = right_send_offsets = right_recv_offsets = typename DistributedNDArray::LocalBeginsType{};
left_neighbor = right_neighbor = -1;
}
};
template< typename DistributedNDArray,
typename LevelTag = std::integral_constant< std::size_t, DistributedNDArray::getDimension() > >
struct SynchronizerBuffersLayerHelper
{};
template< typename DistributedNDArray, std::size_t level >
struct SynchronizerBuffersLayerHelper< DistributedNDArray, std::integral_constant< std::size_t, level > >
: public SynchronizerBuffersLayerHelper< DistributedNDArray, std::integral_constant< std::size_t, level - 1 > >,
public SynchronizerBuffersLayer< DistributedNDArray, level >
{
using SynchronizerBuffersLayerHelper< DistributedNDArray, std::integral_constant< std::size_t, level - 1 > >::getDimBuffers;
using SynchronizerBuffersLayer< DistributedNDArray, level >::getDimBuffers;
};
template< typename DistributedNDArray >
struct SynchronizerBuffersLayerHelper< DistributedNDArray, std::integral_constant< std::size_t, 0 > >
: public SynchronizerBuffersLayer< DistributedNDArray, 0 >
{
using SynchronizerBuffersLayer< DistributedNDArray, 0 >::getDimBuffers;
};
template< typename DistributedNDArray >
struct SynchronizerBuffers
: public SynchronizerBuffersLayerHelper< DistributedNDArray >
{
using SynchronizerBuffersLayerHelper< DistributedNDArray >::getDimBuffers;
template< std::size_t level >
auto& getDimBuffers()
{
return this->getDimBuffers( std::integral_constant< std::size_t, level >{} );
}
};
} // namespace __ndarray_impl
} // namespace Containers
} // namespace TNL
This diff is collapsed.
......@@ -120,6 +120,7 @@ ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOpera
ADD_SUBDIRECTORY( Multimaps )
ADD_SUBDIRECTORY( ndarray )
if( ${BUILD_MPI} )
......
This diff is collapsed.
#include "DistributedNDArrayOverlaps_semi1D_test.h"
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment