...
 
Commits (3)
......@@ -234,9 +234,10 @@ if( ${WITH_HIP} )
if(HIP_FOUND)
set(BUILD_HIP TRUE)
set(CMAKE_HIPCXX_FLAGS ${CMAKE_HIPCXX_FLAGS} -I${HIP_ROOT_DIR}/include -DHAVE_HIP -DHIP_PLATFORM=nvcc )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_ROOT_DIR}/include -DHAVE_HIP -I/usr/lib/llvm-8/include/openmp -L/usr/lib/llvm-8/lib")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_ROOT_DIR}/include -DHAVE_HIP -I/usr/lib/llvm-8/include/openmp -L/usr/lib/llvm-8/lib -lomp -fopenmp")
#set(CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS_INIT} -L/usr/lib/llvm-8/lib} )
set(CMAKE_CXX_COMPILER "${HIP_ROOT_DIR}/bin/hipcc" )
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L/usr/lib/llvm-8/lib -lomp")
endif()
endif()
......
......@@ -148,7 +148,7 @@ compare( const Element1* destination,
TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
auto fetch = [=] __device_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; };
return Reduction< Devices::Hip >::reduce( size, std::logical_and<>{}, fetch, true );
return Reduction< Devices::Hip >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true );
}
template< typename Element,
......@@ -164,7 +164,7 @@ containsValue( const Element* data,
TNL_ASSERT_GE( size, (Index) 0, "" );
auto fetch = [=] __device_callable__ ( Index i ) -> bool { return data[ i ] == value; };
return Reduction< Devices::Hip >::reduce( size, std::logical_or<>{}, fetch, false );
return Reduction< Devices::Hip >::reduce( ( Index) 0, size, std::logical_or<>{}, fetch, false );
}
template< typename Element,
......@@ -180,7 +180,7 @@ containsOnlyValue( const Element* data,
TNL_ASSERT_GE( size, 0, "" );
auto fetch = [=] __device_callable__ ( Index i ) -> bool { return data[ i ] == value; };
return Reduction< Devices::Hip >::reduce( size, std::logical_and<>{}, fetch, true );
return Reduction< Devices::Hip >::reduce( ( Index ) 0, size, std::logical_and<>{}, fetch, true );
}
} // namespace Algorithms
......
......@@ -15,6 +15,7 @@
#include <TNL/Devices/Sequential.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/Hip.h>
namespace TNL {
namespace Algorithms {
......@@ -277,6 +278,66 @@ struct Scan< Devices::Cuda, Type >
const typename Vector::RealType shift );
};
template< ScanType Type >
struct Scan< Devices::Hip, Type >
{
/**
* \brief Computes scan (prefix sum) on GPU.
*
* \tparam Vector type vector being used for the scan.
* \tparam Reduction lambda function defining the reduction operation
*
* \param v input vector, the result of scan is stored in the same vector
* \param begin the first element in the array to be scanned
* \param end the last element in the array to be scanned
* \param reduction lambda function implementing the reduction operation
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/ScanExample.cpp
*
* \par Output
*
* \include ScanExample.out
*/
template< typename Vector,
typename Reduction >
static void
perform( Vector& v,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType zero );
template< typename Vector,
typename Reduction >
static auto
performFirstPhase( Vector& v,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType zero );
template< typename Vector,
typename BlockShifts,
typename Reduction >
static void
performSecondPhase( Vector& v,
const BlockShifts& blockShifts,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType shift );
};
template< ScanType Type >
struct SegmentedScan< Devices::Sequential, Type >
{
......@@ -411,6 +472,52 @@ struct SegmentedScan< Devices::Cuda, Type >
const typename Vector::RealType zero );
};
template< ScanType Type >
struct SegmentedScan< Devices::Hip, Type >
{
/**
* \brief Computes segmented scan (prefix sum) on GPU.
*
* \tparam Vector type vector being used for the scan.
* \tparam Reduction lambda function defining the reduction operation
* \tparam Flags array type containing zeros and ones defining the segments begining
*
* \param v input vector, the result of scan is stored in the same vector
* \param flags is an array with zeros and ones defining the segments begining
* \param begin the first element in the array to be scanned
* \param end the last element in the array to be scanned
* \param reduction lambda function implementing the reduction operation
* \param zero is the idempotent element for the reduction operation, i.e. element which
* does not change the result of the reduction.
*
* The reduction lambda function takes two variables which are supposed to be reduced:
*
* ```
* auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... };
* ```
*
* \par Example
*
* \include ReductionAndScan/SegmentedScanExample.cpp
*
* \par Output
*
* \include SegmentedScanExample.out
*
* **Note: Segmented scan is not implemented for HIP yet.**
*/
template< typename Vector,
typename Reduction,
typename Flags >
static void
perform( Vector& v,
Flags& flags,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType zero );
};
} // namespace Algorithms
} // namespace TNL
......
......@@ -19,6 +19,8 @@
#include <TNL/Containers/StaticArray.h>
#include <TNL/Algorithms/CudaScanKernel.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Algorithms/HipScanKernel.h>
#include <TNL/Exceptions/HipSupportMissing.h>
#include <TNL/Exceptions/NotImplementedError.h>
namespace TNL {
......@@ -212,6 +214,8 @@ performSecondPhase( Vector& v,
#endif
}
/////
// CUDA specialization
template< ScanType Type >
template< typename Vector,
typename Reduction >
......@@ -292,6 +296,88 @@ performSecondPhase( Vector& v,
#endif
}
/////
// HIP specialization
template< ScanType Type >
template< typename Vector,
typename Reduction >
void
Scan< Devices::Hip, Type >::
perform( Vector& v,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType zero )
{
#ifdef HAVE_HIP
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
HipScanKernelLauncher< Type, RealType, IndexType >::perform(
end - begin,
&v.getData()[ begin ], // input
&v.getData()[ begin ], // output
reduction,
zero );
#else
throw Exceptions::HipSupportMissing();
#endif
}
template< ScanType Type >
template< typename Vector,
typename Reduction >
auto
Scan< Devices::Hip, Type >::
performFirstPhase( Vector& v,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType zero )
{
#ifdef HAVE_HIP
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
return HipScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase(
end - begin,
&v.getData()[ begin ], // input
&v.getData()[ begin ], // output
reduction,
zero );
#else
throw Exceptions::HipSupportMissing();
#endif
}
template< ScanType Type >
template< typename Vector,
typename BlockShifts,
typename Reduction >
void
Scan< Devices::Hip, Type >::
performSecondPhase( Vector& v,
const BlockShifts& blockShifts,
const typename Vector::IndexType begin,
const typename Vector::IndexType end,
const Reduction& reduction,
const typename Vector::RealType shift )
{
#ifdef HAVE_HIP
using RealType = typename Vector::RealType;
using IndexType = typename Vector::IndexType;
HipScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase(
end - begin,
&v.getData()[ begin ], // output
blockShifts.getData(),
reduction,
shift );
#else
throw Exceptions::HipSupportMissing();
#endif
}
template< ScanType Type >
template< typename Vector,
......
......@@ -113,6 +113,7 @@ template< typename Device,
typename IndexAllocator,
ElementsOrganization Organization,
int WarpSize >
__device_callable__
auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::
getSegmentsCount() const -> IndexType
{
......
......@@ -186,10 +186,7 @@ printDiagnosticsHost( const char* assertion,
}
#endif // TNL_THROW_ASSERTION_ERROR
//__device_callable__
#ifdef HAVE_HIP
__device__
#endif
__device_callable__
inline void
printDiagnosticsCuda( const char* assertion,
const char* message,
......@@ -215,6 +212,8 @@ fatalFailure()
// https://devtalk.nvidia.com/default/topic/509584/how-to-cancel-a-running-cuda-kernel-/
// TODO: it is reported as "illegal instruction", but that leads to an abort as well...
asm("trap;");
#elif defined __HIP_DEVICE_COMPILE__
//asm("s_trap;"); // TODO HIP: Find how to cancel HIP kernel
#else
throw EXIT_FAILURE;
#endif
......@@ -261,6 +260,11 @@ cmpHelperOpFailure( const char* assertion,
// to construct the dynamic error message
printDiagnosticsCuda( assertion, message, file, function, line,
"Not supported in CUDA kernels." );
#elif defined __HIP_DEVICE_COMPILE__
// diagnostics is not supported - we don't have the machinery
// to construct the dynamic error message
printDiagnosticsCuda( assertion, message, file, function, line,
"Not supported in HIP kernels." );
#else
const std::string formatted_lhs_value = Formatter< T1 >::printToString( lhs_value );
const std::string formatted_rhs_value = Formatter< T2 >::printToString( rhs_value );
......
......@@ -20,7 +20,7 @@ namespace TNL {
namespace Cuda {
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
ObjectType* passToDevice( const ObjectType& object )
{
#ifdef HAVE_CUDA
......@@ -45,7 +45,7 @@ ObjectType* passToDevice( const ObjectType& object )
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
ObjectType passFromDevice( const ObjectType* object )
{
#ifdef HAVE_CUDA
......@@ -62,7 +62,7 @@ ObjectType passFromDevice( const ObjectType* object )
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
void passFromDevice( const ObjectType* deviceObject,
ObjectType& hostObject )
{
......@@ -78,7 +78,7 @@ void passFromDevice( const ObjectType* deviceObject,
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
void freeFromDevice( ObjectType* deviceObject )
{
#ifdef HAVE_CUDA
......
......@@ -10,6 +10,10 @@
#pragma once
#ifdef HAVE_HIP
#include <hip/hip_runtime.h>
#endif
// The __device_callable__ macro has to be in a separate header file to avoid
// infinite loops by the #include directives.
......
......@@ -89,6 +89,7 @@ template< typename Real,
typename Index >
template< typename MeshEntity >
void
__device_callable__
tnlDirectEikonalMethodsBase< Meshes::Grid< 1, Real, Device, Index > >::
updateCell( MeshFunctionType& u,
const MeshEntity& cell,
......
......@@ -20,7 +20,7 @@ namespace TNL {
namespace Hip {
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
ObjectType* passToDevice( const ObjectType& object )
{
#ifdef HAVE_Hip
......@@ -45,7 +45,7 @@ ObjectType* passToDevice( const ObjectType& object )
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
ObjectType passFromDevice( const ObjectType* object )
{
#ifdef HAVE_HIP
......@@ -62,7 +62,7 @@ ObjectType passFromDevice( const ObjectType* object )
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
void passFromDevice( const ObjectType* deviceObject,
ObjectType& hostObject )
{
......@@ -78,7 +78,7 @@ void passFromDevice( const ObjectType* deviceObject,
}
template< typename ObjectType >
[[deprecated("Allocators and MemoryOperations hould be used instead.")]]
[[deprecated("Allocators and MemoryOperations should be used instead.")]]
void freeFromDevice( ObjectType* deviceObject )
{
#ifdef HAVE_HIP
......
......@@ -22,6 +22,7 @@ template< typename Real,
typename Device,
typename Index,
ElementsOrganization Organization >
__device_callable__
MultidiagonalMatrixView< Real, Device, Index, Organization >::
MultidiagonalMatrixView()
{
......@@ -31,6 +32,7 @@ template< typename Real,
typename Device,
typename Index,
ElementsOrganization Organization >
__device_callable__
MultidiagonalMatrixView< Real, Device, Index, Organization >::
MultidiagonalMatrixView( const ValuesViewType& values,
const DiagonalsOffsetsView& diagonalsOffsets,
......
......@@ -21,6 +21,7 @@ template< typename Real,
typename Device,
typename Index,
ElementsOrganization Organization >
__device_callable__
TridiagonalMatrixView< Real, Device, Index, Organization >::
TridiagonalMatrixView()
{
......@@ -30,6 +31,7 @@ template< typename Real,
typename Device,
typename Index,
ElementsOrganization Organization >
__device_callable__
TridiagonalMatrixView< Real, Device, Index, Organization >::
TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer )
: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer )
......
......@@ -78,7 +78,7 @@ using ViewTypes = ::testing::Types<
ArrayView< MyData, Devices::Host, long >
#endif
#ifdef HAVE_CUDA
ArrayView< int, Devices::Cuda, short >,
,ArrayView< int, Devices::Cuda, short >,
ArrayView< long, Devices::Cuda, short >,
ArrayView< float, Devices::Cuda, short >,
ArrayView< double, Devices::Cuda, short >,
......@@ -94,6 +94,24 @@ using ViewTypes = ::testing::Types<
ArrayView< double, Devices::Cuda, long >,
ArrayView< MyData, Devices::Cuda, long >
#endif
#ifdef HAVE_HIP
,ArrayView< int, Devices::Hip, short >,
ArrayView< long, Devices::Hip, short >,
ArrayView< float, Devices::Hip, short >,
ArrayView< double, Devices::Hip, short >,
ArrayView< MyData, Devices::Hip, short >,
ArrayView< int, Devices::Hip, int >,
ArrayView< long, Devices::Hip, int >,
ArrayView< float, Devices::Hip, int >,
ArrayView< double, Devices::Hip, int >,
ArrayView< MyData, Devices::Hip, int >,
ArrayView< int, Devices::Hip, long >,
ArrayView< long, Devices::Hip, long >,
ArrayView< float, Devices::Hip, long >,
ArrayView< double, Devices::Hip, long >,
ArrayView< MyData, Devices::Hip, long >
#endif
// all ArrayView tests should also work with VectorView
// (but we can't test all types because the argument list would be too long...)
......@@ -107,6 +125,11 @@ using ViewTypes = ::testing::Types<
VectorView< float, Devices::Cuda, long >,
VectorView< double, Devices::Cuda, long >
#endif
#ifdef HAVE_HIP
,
VectorView< float, Devices::Hip, long >,
VectorView< double, Devices::Hip, long >
#endif
>;
TYPED_TEST_SUITE( ArrayViewTest, ViewTypes );
......@@ -258,6 +281,31 @@ void testArrayViewElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u )
#endif
}
#ifdef HAVE_HIP
template< typename ValueType, typename IndexType >
__global__ void testSetGetElementKernel( ArrayView< ValueType, Devices::Hip, IndexType > v )
{
if( threadIdx.x < v.getSize() )
v[ threadIdx.x ] = threadIdx.x;
}
#endif /* HAVE_HIP */
template< typename Value, typename Index >
void testArrayViewElementwiseAccess( Array< Value, Devices::Hip, Index >&& u )
{
#ifdef HAVE_HIP
u.setSize( 10 );
using ArrayType = Array< Value, Devices::Hip, Index >;
using ViewType = ArrayView< Value, Devices::Hip, Index >;
ViewType v( u );
testSetGetElementKernel<<< 1, 16 >>>( v );
TNL_CHECK_HIP_DEVICE;
for( int i = 0; i < 10; i++ ) {
EXPECT_EQ( u.getElement( i ), i );
}
#endif
}
TYPED_TEST( ArrayViewTest, elementwiseAccess )
{
using ArrayType = typename TestFixture::ArrayType;
......
......@@ -38,19 +38,19 @@ TARGET_LINK_LIBRARIES( VectorVerticalOperationsTest ${GTEST_BOTH_LIBRARIES} )
#TARGET_LINK_LIBRARIES( VectorOfStaticVectorsTest ${GTEST_BOTH_LIBRARIES} )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu
OPTIONS ${CXX_TESTS_FLAGS} )
CUDA_ADD_EXECUTABLE( ArrayTestCuda ArrayTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( ArrayTestCuda ${GTEST_BOTH_LIBRARIES} )
CUDA_ADD_EXECUTABLE( ArrayViewTestCuda ArrayViewTest.cu
OPTIONS ${CXX_TESTS_FLAGS} )
CUDA_ADD_EXECUTABLE( ArrayViewTestCuda ArrayViewTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( ArrayViewTestCuda ${GTEST_BOTH_LIBRARIES} )
CUDA_ADD_EXECUTABLE( VectorTestCuda VectorTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
CUDA_ADD_EXECUTABLE( VectorPrefixSumTestCuda VectorPrefixSumTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
CUDA_ADD_EXECUTABLE( VectorEvaluateAndReduceTestCuda VectorEvaluateAndReduceTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorTestCuda ${GTEST_BOTH_LIBRARIES} )
CUDA_ADD_EXECUTABLE( VectorPrefixSumTestCuda VectorPrefixSumTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorPrefixSumTestCuda ${GTEST_BOTH_LIBRARIES} )
CUDA_ADD_EXECUTABLE( VectorEvaluateAndReduceTestCuda VectorEvaluateAndReduceTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorEvaluateAndReduceTestCuda ${GTEST_BOTH_LIBRARIES} )
CUDA_ADD_EXECUTABLE( VectorBinaryOperationsTestCuda VectorBinaryOperationsTest.cu OPTIONS ${CXX_TESTS_FLAGS} )
......@@ -71,6 +71,35 @@ IF( BUILD_HIP )
HIP_ADD_EXECUTABLE( ArrayTestHip ArrayTest.cpp )
TARGET_COMPILE_OPTIONS( ArrayTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( ArrayTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( ArrayViewTestHip ArrayViewTest.cpp )
TARGET_COMPILE_OPTIONS( ArrayViewTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( ArrayViewTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorTestHip VectorTest.cpp )
TARGET_COMPILE_OPTIONS( VectorTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorPrefixSumTestHip VectorPrefixSumTest.cpp )
TARGET_COMPILE_OPTIONS( VectorPrefixSumTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorPrefixSumTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorEvaluateAndReduceTestHip VectorEvaluateAndReduceTest.cpp )
TARGET_COMPILE_OPTIONS( VectorEvaluateAndReduceTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorEvaluateAndReduceTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorBinaryOperationsTestHip VectorBinaryOperationsTest.cpp )
TARGET_COMPILE_OPTIONS( VectorBinaryOperationsTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorBinaryOperationsTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorUnaryOperationsTestHip VectorUnaryOperationsTest.cpp )
TARGET_COMPILE_OPTIONS( VectorUnaryOperationsTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorUnaryOperationsTestHip ${GTEST_BOTH_LIBRARIES} )
HIP_ADD_EXECUTABLE( VectorVerticalOperationsTestHip VectorVerticalOperationsTest.cpp )
TARGET_COMPILE_OPTIONS( VectorVerticalOperationsTestHip PRIVATE ${CMAKE_HIPCXX_FLAGS} ${CXX_TESTS_FLAGS} )
TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestHip ${GTEST_BOTH_LIBRARIES} )
ENDIF( BUILD_HIP )
ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp )
......
......@@ -144,7 +144,7 @@ protected:
// types for which VectorBinaryOperationsTest is instantiated
#if defined(DISTRIBUTED_VECTOR)
using VectorPairs = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Pair< DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator >,
DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator > >,
Pair< DistributedVector< double, Devices::Host, int, Communicators::MpiCommunicator >,
......@@ -181,6 +181,7 @@ protected:
DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator > >
#endif
>;
// TODO HIP: Add tests for HIP distributed vectors when it is implemented
#elif defined(STATIC_VECTOR)
#ifdef VECTOR_OF_STATIC_VECTORS
using VectorPairs = ::testing::Types<
......@@ -207,21 +208,28 @@ protected:
#else
#ifdef VECTOR_OF_STATIC_VECTORS
using VectorPairs = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Pair< Vector< StaticVector< 3, double >, Devices::Host >, Vector< StaticVector< 3, double >, Devices::Host > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Host >, Vector< StaticVector< 3, double >, Devices::Host > >,
Pair< Vector< StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Host >, VectorView< StaticVector< 3, double >, Devices::Host > >
#else
#endif
#ifdef HAVE_CUDA
Pair< Vector< StaticVector< 3, double >, Devices::Cuda >, Vector< StaticVector< 3, double >, Devices::Cuda > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, Vector< StaticVector< 3, double >, Devices::Cuda > >,
Pair< Vector< StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Cuda >, VectorView< StaticVector< 3, double >, Devices::Cuda > >
#endif
#ifdef HAVE_HIP
Pair< Vector< StaticVector< 3, double >, Devices::Hip >, Vector< StaticVector< 3, double >, Devices::Hip > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Hip >, Vector< StaticVector< 3, double >, Devices::Hip > >,
Pair< Vector< StaticVector< 3, double >, Devices::Hip >, VectorView< StaticVector< 3, double >, Devices::Hip > >,
Pair< VectorView< StaticVector< 3, double >, Devices::Hip >, VectorView< StaticVector< 3, double >, Devices::Hip > >
#endif
>;
#else
using VectorPairs = ::testing::Types<
#ifndef HAVE_CUDA
#if ! defined HAVE_CUDA && ! defined HAVE_HIP
Pair< Vector< int, Devices::Host >, Vector< int, Devices::Host > >,
Pair< VectorView< int, Devices::Host >, Vector< int, Devices::Host > >,
Pair< VectorView< const int, Devices::Host >, Vector< int, Devices::Host > >,
......@@ -235,7 +243,8 @@ protected:
Pair< VectorView< double, Devices::Host >, Vector< double, Devices::Host > >,
Pair< Vector< double, Devices::Host >, VectorView< double, Devices::Host > >,
Pair< VectorView< double, Devices::Host >, VectorView< double, Devices::Host > >
#else
#endif
#ifdef HAVE_CUDA
Pair< Vector< int, Devices::Cuda >, Vector< int, Devices::Cuda > >,
Pair< VectorView< int, Devices::Cuda >, Vector< int, Devices::Cuda > >,
Pair< VectorView< const int, Devices::Cuda >, Vector< int, Devices::Cuda > >,
......@@ -250,6 +259,21 @@ protected:
Pair< Vector< double, Devices::Cuda >, VectorView< double, Devices::Cuda > >,
Pair< VectorView< double, Devices::Cuda >, VectorView< double, Devices::Cuda > >
#endif
#ifdef HAVE_HIP
Pair< Vector< int, Devices::Hip >, Vector< int, Devices::Hip > >,
Pair< VectorView< int, Devices::Hip >, Vector< int, Devices::Hip > >,
Pair< VectorView< const int, Devices::Hip >, Vector< int, Devices::Hip > >,
Pair< Vector< int, Devices::Hip >, VectorView< int, Devices::Hip > >,
Pair< Vector< int, Devices::Hip >, VectorView< const int, Devices::Hip > >,
Pair< VectorView< int, Devices::Hip >, VectorView< int, Devices::Hip > >,
Pair< VectorView< const int, Devices::Hip >, VectorView< int, Devices::Hip > >,
Pair< VectorView< const int, Devices::Hip >, VectorView< const int, Devices::Hip > >,
Pair< VectorView< int, Devices::Hip >, VectorView< const int, Devices::Hip > >,
Pair< Vector< double, Devices::Hip >, Vector< double, Devices::Hip > >,
Pair< VectorView< double, Devices::Hip >, Vector< double, Devices::Hip > >,
Pair< Vector< double, Devices::Hip >, VectorView< double, Devices::Hip > >,
Pair< VectorView< double, Devices::Hip >, VectorView< double, Devices::Hip > >
#endif
>;
#endif
#endif
......
......@@ -135,6 +135,66 @@ TYPED_TEST( VectorTest, scan )
EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize();
#endif
}
////
// With HIP, perform tests with multiple HIP grids.
if( std::is_same< DeviceType, Devices::Hip >::value )
{
#ifdef HAVE_HIP
Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::maxGridSize() = 3;
setConstantSequence( v, 0 );
v_host = -1;
v.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
setConstantSequence( v, 1 );
v_host = -1;
v.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v_view;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
setLinearSequence( v );
v_host = -1;
v.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
// test views
setConstantSequence( v, 0 );
v_host = -1;
v_view.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
setConstantSequence( v, 1 );
v_host = -1;
v_view.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v_view;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i;
setLinearSequence( v );
v_host = -1;
v_view.scan();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i;
Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::resetMaxGridSize();
#endif
}
}
......@@ -260,11 +320,69 @@ TYPED_TEST( VectorTest, exclusiveScan )
Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize();
#endif
}
}
// TODO: test scan with custom begin and end parameters
////
// With HIP, perform tests with multiple HIP grids.
if( std::is_same< DeviceType, Devices::Hip >::value )
{
#ifdef HAVE_HIP
Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::maxGridSize() = 3;
setConstantSequence( v, 0 );
v_host = -1;
v.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
setConstantSequence( v, 1 );
v_host = -1;
v.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
setLinearSequence( v );
v_host = -1;
v.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
// test views
setConstantSequence( v, 0 );
v_host = -1;
v_view.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i;
setConstantSequence( v, 1 );
v_host = -1;
v_view.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], i ) << "i = " << i;
setLinearSequence( v );
v_host = -1;
v_view.template scan< Algorithms::ScanType::Exclusive >();
EXPECT_GT( ( Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 );
v_host = v;
for( int i = 0; i < size; i++ )
EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i;
Algorithms::HipScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::resetMaxGridSize();
#endif
}
}
// TODO: test scan with custom begin and end parameters
template< typename FlagsView >
void setupFlags( FlagsView& f )
{
......
......@@ -36,7 +36,7 @@ protected:
// types for which VectorTest is instantiated
// TODO: Quad must be fixed
using VectorTypes = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && ! defined HAVE_HIP
Vector< int, Devices::Host, short >,
Vector< long, Devices::Host, short >,
Vector< float, Devices::Host, short >,
......@@ -76,6 +76,26 @@ using VectorTypes = ::testing::Types<
//Vector< Quad< float >, Devices::Cuda, long >,
//Vector< Quad< double >, Devices::Cuda, long >
#endif
#ifdef HAVE_HIP
Vector< int, Devices::Hip, short >,
Vector< long, Devices::Hip, short >,
Vector< float, Devices::Hip, short >,
Vector< double, Devices::Hip, short >,
//Vector< Quad< float >, Devices::Hip, short >,
//Vector< Quad< double >, Devices::Hip, short >,
Vector< int, Devices::Hip, int >,
Vector< long, Devices::Hip, int >,
Vector< float, Devices::Hip, int >,
Vector< double, Devices::Hip, int >,
//Vector< Quad< float >, Devices::Hip, int >,
//Vector< Quad< double >, Devices::Hip, int >,
Vector< int, Devices::Hip, long >,
Vector< long, Devices::Hip, long >,
Vector< float, Devices::Hip, long >,
Vector< double, Devices::Hip, long >
//Vector< Quad< float >, Devices::Hip, long >,
//Vector< Quad< double >, Devices::Hip, long >
#endif
>;
TYPED_TEST_SUITE( VectorTest, VectorTypes );
......
......@@ -82,6 +82,7 @@ protected:
DistributedVectorView< double, Devices::Cuda, int, Communicators::NoDistrCommunicator >,
DistributedVectorView< const double, Devices::Cuda, int, Communicators::NoDistrCommunicator >
#endif
// TODO HIP: Add tests for distributed vectors with HIP - it is not implemnted yet
>;
#elif defined(STATIC_VECTOR)
#ifdef VECTOR_OF_STATIC_VECTORS
......@@ -109,17 +110,22 @@ protected:
#else
#ifdef VECTOR_OF_STATIC_VECTORS
using VectorTypes = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Vector< StaticVector< 3, double >, Devices::Host >,
VectorView< StaticVector< 3, double >, Devices::Host >
#else
#endif
#ifdef HAVE_CUDA
Vector< StaticVector< 3, double >, Devices::Cuda >,
VectorView< StaticVector< 3, double >, Devices::Cuda >
#endif
#ifdef HAVE_HIP
Vector< StaticVector< 3, double >, Devices::Hip >,
VectorView< StaticVector< 3, double >, Devices::Hip >
#endif
>;
#else
using VectorTypes = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Vector< int, Devices::Host >,
VectorView< int, Devices::Host >,
VectorView< const int, Devices::Host >,
......@@ -133,6 +139,13 @@ protected:
Vector< double, Devices::Cuda >,
VectorView< double, Devices::Cuda >
#endif
#ifdef HAVE_HIP
Vector< int, Devices::Hip >,
VectorView< int, Devices::Hip >,
VectorView< const int, Devices::Hip >,
Vector< double, Devices::Hip >,
VectorView< double, Devices::Hip >
#endif
>;
#endif
#endif
......
......@@ -27,6 +27,10 @@
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/VectorView.h>
#endif
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/Hip.h>
#include "VectorHelperFunctions.h"
......@@ -145,17 +149,22 @@ protected:
#else
#ifdef VECTOR_OF_STATIC_VECTORS
using VectorTypes = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Vector< StaticVector< 3, double >, Devices::Host >,
VectorView< StaticVector< 3, double >, Devices::Host >
#else
#endif
#ifdef HAVE_CUDA
Vector< StaticVector< 3, double >, Devices::Cuda >,
VectorView< StaticVector< 3, double >, Devices::Cuda >
#endif
#ifdef HAVE_HIP
Vector< StaticVector< 3, double >, Devices::Hip >,
VectorView< StaticVector< 3, double >, Devices::Hip >
#endif
>;
#else
using VectorTypes = ::testing::Types<
#ifndef HAVE_CUDA
#if !defined HAVE_CUDA && !defined HAVE_HIP
Vector< int, Devices::Host >,
VectorView< int, Devices::Host >,
VectorView< const int, Devices::Host >,
......@@ -169,6 +178,13 @@ protected:
Vector< double, Devices::Cuda >,
VectorView< double, Devices::Cuda >
#endif
#ifdef HAVE_HIP
Vector< int, Devices::Hip >,
VectorView< int, Devices::Hip >,
VectorView< const int, Devices::Hip >,
Vector< double, Devices::Hip >,
VectorView< double, Devices::Hip >
#endif
>;
#endif
#endif
......