Skip to content
Snippets Groups Projects
Commit 2b7be771 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing BLAS benchmark.

parent 51642ea9
No related branches found
No related tags found
1 merge request!32Expression templates 2
......@@ -320,7 +320,7 @@ endif()
find_package( BLAS )
if( BLAS_FOUND )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_BLAS" )
#message( "BLAS: ${BLAS_LIBRARIES}" )
set( HAVE_BLAS TRUE)
endif()
#if( BUILD_MPI )
# FIND_PATH( PETSC_INCLUDE_DIR petsc.h
......@@ -455,3 +455,4 @@ message( " CMAKE_SHARED_LINKER_FLAGS_DEBUG = ${CMAKE_SHARED_LINKER_FLAGS_DEBUG
message( " CMAKE_SHARED_LINKER_FLAGS_RELEASE = ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}" )
message( " CUDA_NVCC_FLAGS = ${CUDA_NVCC_FLAGS}" )
message( " GMP_LIBRARIES = ${GMP_LIBRARIES}" )
message( " BLAS_libraries = ${BLAS_LIBRARIES}" )
......@@ -4,6 +4,8 @@ if( BUILD_CUDA )
else()
ADD_EXECUTABLE( tnl-benchmark-blas tnl-benchmark-blas.cpp )
endif()
target_link_libraries( tnl-benchmark-blas ${BLAS_LIBRARIES} )
if( HAVE_BLAS )
target_link_libraries( tnl-benchmark-blas ${BLAS_LIBRARIES} )
endif()
install( TARGETS tnl-benchmark-blas RUNTIME DESTINATION bin )
#pragma once
#ifdef HAVE_CUDA
#ifdef HAVE_BLAS
#include <cblas.h>
......@@ -17,15 +15,15 @@ inline int blasIgamax( int n, const double *x, int incx )
}
inline int blasIgamin( int n, const float *x, int incx )
/*inline int blasIgamin( int n, const float *x, int incx )
{
return cblas_Isamin( n, x, incx );
return cblas_isamin( n, x, incx );
}
inline int blasIgamin( int n, const double *x, int incx )
{
return cblas_Idamin( n, x, incx );
}
return cblas_idamin( n, x, incx );
}*/
inline float blasGasum( int n, const float *x, int incx )
......@@ -39,73 +37,54 @@ inline double blasGasum( int n, const double *x, int incx )
}
inline void
blasGaxpy( int n, const float *alpha,
const float *x, int incx,
float *y, int incy )
inline void blasGaxpy( int n, const float alpha,
const float *x, int incx,
float *y, int incy )
{
cblas_saxpy( n, alpha, x, incx, y, incy );
}
inline blasStatus_t
blasGaxpy( blasHandle_t int n,
const double *alpha,
const double *x, int incx,
double *y, int incy )
inline void blasGaxpy( int n, const double alpha,
const double* x, int incx,
double *y, int incy )
{
return cblas_Daxpy( n, alpha, x, incx, y, incy );
cblas_daxpy( n, alpha, x, incx, y, incy );
}
inline blasStatus_t
blasGdot( blasHandle_t int n,
const float *x, int incx,
const float *y, int incy,
float *result )
inline float blasGdot( int n, const float* x, int incx,
const float* y, int incy )
{
return cblas_Sdot( n, x, incx, y, incy, result );
return cblas_sdot( n, x, incx, y, incy );
}
inline blasStatus_t
blasGdot( blasHandle_t int n,
const double *x, int incx,
const double *y, int incy,
double *result )
inline double blasGdot( int n, const double* x, int incx,
const double* y, int incy )
{
return cblas_Ddot( n, x, incx, y, incy, result );
return cblas_ddot( n, x, incx, y, incy );
}
inline blasStatus_t
blasGnrm2( blasHandle_t int n,
const float *x, int incx, float *result )
inline float blasGnrm2( int n, const float* x, int incx )
{
return cblas_Snrm2( n, x, incx, result );
return cblas_snrm2( n, x, incx );
}
inline blasStatus_t
blasGnrm2( blasHandle_t int n,
const double *x, int incx, double *result )
inline double blasGnrm2( int n, const double* x, int incx )
{
return cblas_Dnrm2( n, x, incx, result );
return cblas_dnrm2( n, x, incx );
}
inline blasStatus_t
blasGscal( blasHandle_t int n,
const float *alpha,
float *x, int incx )
inline void blasGscal( int n, const float alpha,
float* x, int incx )
{
return cblas_Sscal( n, alpha, x, incx );
cblas_sscal( n, alpha, x, incx );
}
inline blasStatus_t
blasGscal( blasHandle_t int n,
const double *alpha,
double *x, int incx )
inline void blasGscal( int n, const double alpha,
double* x, int incx )
{
return cblas_Dscal( n, alpha, x, incx );
cblas_dscal( n, alpha, x, incx );
}
#endif
#endif
......@@ -18,7 +18,9 @@
#include <TNL/Containers/Vector.h>
#ifdef HAVE_BLAS
#include "blasWrappers.h"
#endif
#ifdef HAVE_CUDA
#include "cublasWrappers.h"
......@@ -87,10 +89,10 @@ benchmarkVectorOperations( Benchmark & benchmark,
reset2();
};
reset12();
////
// Max
auto maxHost = [&]() {
resultHost = hostVector.max();
};
......@@ -112,7 +114,8 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "GPU ET", maxCudaET );
#endif
////
// Min
auto minHost = [&]() {
resultHost = hostVector.min();
};
......@@ -133,7 +136,8 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "GPU", minCudaET );
#endif
////
// Absmax
auto absMaxHost = [&]() {
resultHost = hostVector.absMax();
};
......@@ -146,6 +150,12 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto absMaxCudaET = [&]() {
resultDevice = max( abs( deviceView ) );
};
#ifdef HAVE_BLAS
auto absMaxBlas = [&]() {
int index = blasIgamax( size, hostVector.getData(), 1 );
resultHost = hostVector.getElement( index );
};
#endif
#ifdef HAVE_CUDA
auto absMaxCublas = [&]() {
int index = 0;
......@@ -158,13 +168,15 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.setOperation( "absMax", datasetSize );
benchmark.time< Devices::Host >( reset1, "CPU", absMaxHost );
benchmark.time< Devices::Host >( reset1, "CPU ET", absMaxHostET );
benchmark.time< Devices::Host >( reset1, "BLAS", absMaxBlas );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset1, "GPU", absMaxCuda );
benchmark.time< Devices::Cuda >( reset1, "GPU ET", absMaxCudaET );
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMaxCublas );
#endif
////
// Absmin
auto absMinHost = [&]() {
resultHost = hostVector.absMin();
};
......@@ -177,6 +189,12 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto absMinCudaET = [&]() {
resultDevice = min( abs( deviceView ) );
};
/*#ifdef HAVE_BLAS
auto absMinBlas = [&]() {
int index = blasIgamin( size, hostVector.getData(), 1 );
resultHost = hostVector.getElement( index );
};
#endif*/
#ifdef HAVE_CUDA
auto absMinCublas = [&]() {
int index = 0;
......@@ -188,14 +206,16 @@ benchmarkVectorOperations( Benchmark & benchmark,
#endif
benchmark.setOperation( "absMin", datasetSize );
benchmark.time< Devices::Host >( reset1, "CPU", absMinHost );
benchmark.time< Devices::Host >( reset1, "CPU", absMinHostET );
benchmark.time< Devices::Host >( reset1, "CPU ET", absMinHostET );
//benchmark.time< Devices::Host >( reset1, "BLAS", absMinBlas );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCuda );
benchmark.time< Devices::Cuda >( reset1, "GPU", absMinCudaET );
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", absMinCublas );
#endif
////
// Sum
auto sumHost = [&]() {
resultHost = hostVector.sum();
};
......@@ -216,7 +236,8 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "GPU", sumCudaET );
#endif
////
// L1 norm
auto l1normHost = [&]() {
resultHost = hostVector.lpNorm( 1.0 );
};
......@@ -245,7 +266,8 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l1normCublas );
#endif
////
// L2 norm
auto l2normHost = [&]() {
resultHost = hostVector.lpNorm( 2.0 );
};
......@@ -258,6 +280,11 @@ benchmarkVectorOperations( Benchmark & benchmark,
auto l2normCudaET = [&]() {
resultDevice = lpNorm( deviceView, 2.0 );
};
#ifdef HAVE_BLAS
auto l2normBlas = [&]() {
resultHost = blasGnrm2( size, hostVector.getData(), 1 );
};
#endif
#ifdef HAVE_CUDA
auto l2normCublas = [&]() {
cublasGnrm2( cublasHandle, size,
......@@ -268,13 +295,15 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.setOperation( "l2 norm", datasetSize );
benchmark.time< Devices::Host >( reset1, "CPU", l2normHost );
benchmark.time< Devices::Host >( reset1, "CPU", l2normHostET );
benchmark.time< Devices::Host >( reset1, "BLAS", l2normBlas );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCuda );
benchmark.time< Devices::Cuda >( reset1, "GPU", l2normCudaET );
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", l2normCublas );
#endif
////
// L3 norm
auto l3normHost = [&]() {
resultHost = hostVector.lpNorm( 3.0 );
};
......@@ -296,13 +325,26 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "GPU", l3normCudaET );
#endif
////
// Scalar product
auto scalarProductHost = [&]() {
resultHost = hostVector.scalarProduct( hostVector2 );
};
auto scalarProductCuda = [&]() {
resultDevice = deviceVector.scalarProduct( deviceVector2 );
};
auto scalarProductHostET = [&]() {
resultHost = ( hostView, hostView2 );
};
auto scalarProductCudaET = [&]() {
resultDevice = ( deviceView, deviceView2 );
};
#ifdef HAVE_BLAS
auto scalarProductBlas = [&]() {
resultHost = blasGdot( size, hostVector.getData(), 1, hostVector2.getData(), 1 );
};
#endif
#ifdef HAVE_CUDA
auto scalarProductCublas = [&]() {
cublasGdot( cublasHandle, size,
......@@ -313,11 +355,16 @@ benchmarkVectorOperations( Benchmark & benchmark,
#endif
benchmark.setOperation( "scalar product", 2 * datasetSize );
benchmark.time< Devices::Host >( reset1, "CPU", scalarProductHost );
benchmark.time< Devices::Host >( reset1, "CPU ET", scalarProductHostET );
benchmark.time< Devices::Host >( reset1, "BLAS", scalarProductBlas );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset1, "GPU", scalarProductCuda );
benchmark.time< Devices::Cuda >( reset1, "GPU ET", scalarProductCudaET );
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", scalarProductCublas );
#endif
////
// Prefix sum
/*
std::cout << "Benchmarking prefix-sum:" << std::endl;
timer.reset();
......@@ -348,6 +395,8 @@ benchmarkVectorOperations( Benchmark & benchmark,
*/
////
// Scalar multiplication
auto multiplyHost = [&]() {
hostVector *= 0.5;
};
......@@ -369,13 +418,28 @@ benchmarkVectorOperations( Benchmark & benchmark,
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", multiplyCublas );
#endif
////
// Vector addition
auto addVectorHost = [&]() {
hostVector.addVector( hostVector2 );
};
auto addVectorCuda = [&]() {
deviceVector.addVector( deviceVector2 );
};
auto addVectorHostET = [&]() {
hostView += hostView2;
};
auto addVectorCudaET = [&]() {
deviceView += deviceView2;
};
#ifdef HAVE_CUDA
auto addVectorBlas = [&]() {
const Real alpha = 1.0;
blasGaxpy( size, alpha,
deviceVector2.getData(), 1,
deviceVector.getData(), 1 );
};
#endif
#ifdef HAVE_CUDA
auto addVectorCublas = [&]() {
const Real alpha = 1.0;
......@@ -387,8 +451,11 @@ benchmarkVectorOperations( Benchmark & benchmark,
#endif
benchmark.setOperation( "vector addition", 3 * datasetSize );
benchmark.time< Devices::Host >( reset1, "CPU", addVectorHost );
benchmark.time< Devices::Host >( reset1, "CPU ET", addVectorHostET );
benchmark.time< Devices::Host >( reset1, "BLAS", addVectorBlas );
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset1, "GPU", addVectorCuda );
benchmark.time< Devices::Cuda >( reset1, "GPU ET", addVectorCudaET );
benchmark.time< Devices::Cuda >( reset1, "cuBLAS", addVectorCublas );
#endif
......
......@@ -24,7 +24,6 @@ namespace TNL {
////
// Addition
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Addition >
operator+( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -32,7 +31,6 @@ operator+( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Addition >
operator+( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -40,7 +38,6 @@ operator+( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Addition >
operator+( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -50,7 +47,6 @@ operator+( const Containers::VectorView< Real1, Device, Index >& a, const Contai
////
// Subtraction
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Subtraction >
operator-( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -58,7 +54,6 @@ operator-( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Subtraction >
operator-( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -66,7 +61,6 @@ operator-( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Subtraction >
operator-( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -76,7 +70,6 @@ operator-( const Containers::VectorView< Real1, Device, Index >& a, const Contai
////
// Multiplication
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Multiplication >
operator*( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -84,7 +77,6 @@ operator*( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >
operator*( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -92,7 +84,6 @@ operator*( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >
operator*( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -102,7 +93,6 @@ operator*( const Containers::VectorView< Real1, Device, Index >& a, const Contai
////
// Division
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Division >
operator/( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -110,7 +100,6 @@ operator/( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Division >
operator/( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -118,7 +107,6 @@ operator/( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Division >
operator/( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -128,7 +116,6 @@ operator/( const Containers::VectorView< Real1, Device, Index >& a, const Contai
////
// Min
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Min >
min( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -136,7 +123,6 @@ min( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Min >
min( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -144,7 +130,6 @@ min( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Min >
min( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -154,7 +139,6 @@ min( const Containers::VectorView< Real1, Device, Index >& a, const Containers::
////
// Max
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, ET, Containers::Expressions::Max >
max( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -162,7 +146,6 @@ max( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Max >
max( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -170,7 +153,6 @@ max( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Max >
max( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......@@ -180,21 +162,18 @@ max( const Containers::VectorView< Real1, Device, Index >& a, const Containers::
////
// Comparison operations - operator ==
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator==( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonEQ( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator==( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonEQ( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator==( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonEQ( a, b );
......@@ -203,21 +182,18 @@ bool operator==( const Containers::VectorView< Real1, Device, Index >& a, const
////
// Comparison operations - operator !=
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator!=( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonNE( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator!=( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonNE( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator!=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonNE( a, b );
......@@ -226,21 +202,18 @@ bool operator!=( const Containers::VectorView< Real1, Device, Index >& a, const
////
// Comparison operations - operator <
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator<( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonLT( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator<( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonLT( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator<( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonLT( a, b );
......@@ -249,21 +222,18 @@ bool operator<( const Containers::VectorView< Real1, Device, Index >& a, const C
////
// Comparison operations - operator <=
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator<=( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonLE( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator<=( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonLE( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator<=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonLE( a, b );
......@@ -272,21 +242,18 @@ bool operator<=( const Containers::VectorView< Real1, Device, Index >& a, const
////
// Comparison operations - operator >
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator>( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonGT( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator>( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonGT( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator>( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonGT( a, b );
......@@ -295,21 +262,18 @@ bool operator>( const Containers::VectorView< Real1, Device, Index >& a, const C
////
// Comparison operations - operator >=
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
bool operator>=( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return Containers::Expressions::ComparisonGE( a, b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
bool operator>=( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return Containers::Expressions::ComparisonGE( a, b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
bool operator>=( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
return Containers::Expressions::ComparisonGE( a, b );
......@@ -318,7 +282,6 @@ bool operator>=( const Containers::VectorView< Real1, Device, Index >& a, const
////
// Minus
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Minus >
operator-( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -328,7 +291,6 @@ operator-( const Containers::VectorView< Real, Device, Index >& a )
////
// Abs
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Abs >
abs( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -338,7 +300,6 @@ abs( const Containers::VectorView< Real, Device, Index >& a )
////
// Sine
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sin >
sin( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -348,7 +309,6 @@ sin( const Containers::VectorView< Real, Device, Index >& a )
////
// Cosine
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cos >
cos( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -358,7 +318,6 @@ cos( const Containers::VectorView< Real, Device, Index >& a )
////
// Tangent
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tan >
tan( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -368,7 +327,6 @@ tan( const Containers::VectorView< Real, Device, Index >& a )
////
// Sqrt
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sqrt >
sqrt( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -378,7 +336,6 @@ sqrt( const Containers::VectorView< Real, Device, Index >& a )
////
// Cbrt
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cbrt >
cbrt( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -388,7 +345,6 @@ cbrt( const Containers::VectorView< Real, Device, Index >& a )
////
// Power
template< typename Real, typename Device, typename Index, typename ExpType >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Pow, ExpType >
pow( const Containers::VectorView< Real, Device, Index >& a, const ExpType& exp )
{
......@@ -398,7 +354,6 @@ pow( const Containers::VectorView< Real, Device, Index >& a, const ExpType& exp
////
// Floor
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Floor >
floor( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -408,7 +363,6 @@ floor( const Containers::VectorView< Real, Device, Index >& a )
////
// Ceil
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Ceil >
ceil( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -418,7 +372,6 @@ ceil( const Containers::VectorView< Real, Device, Index >& a )
////
// Acos
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Acos >
acos( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -428,7 +381,6 @@ acos( const Containers::VectorView< Real, Device, Index >& a )
////
// Asin
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Asin >
asin( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -438,7 +390,6 @@ asin( const Containers::VectorView< Real, Device, Index >& a )
////
// Atan
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Atan >
atan( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -448,7 +399,6 @@ atan( const Containers::VectorView< Real, Device, Index >& a )
////
// Cosh
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Cosh >
cosh( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -458,7 +408,6 @@ cosh( const Containers::VectorView< Real, Device, Index >& a )
////
// Tanh
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Tanh >
tanh( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -468,7 +417,6 @@ tanh( const Containers::VectorView< Real, Device, Index >& a )
////
// Log
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log >
log( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -478,7 +426,6 @@ log( const Containers::VectorView< Real, Device, Index >& a )
////
// Log10
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log10 >
log10( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -488,7 +435,6 @@ log10( const Containers::VectorView< Real, Device, Index >& a )
////
// Log2
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Log2 >
log2( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -498,7 +444,6 @@ log2( const Containers::VectorView< Real, Device, Index >& a )
////
// Exp
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Exp >
exp( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -508,7 +453,6 @@ exp( const Containers::VectorView< Real, Device, Index >& a )
////
// Sign
template< typename Real, typename Device, typename Index >
__cuda_callable__
const Containers::Expressions::UnaryExpressionTemplate< Containers::VectorView< Real, Device, Index >, Containers::Expressions::Sign >
sign( const Containers::VectorView< Real, Device, Index >& a )
{
......@@ -602,21 +546,18 @@ binaryAnd( const Containers::VectorView< Real, Device, Index >& a )
////
// Scalar product
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
Real operator,( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
return TNL::sum( a * b );
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
Real operator,( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
return TNL::sum( a * b );
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
auto operator,( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
->decltype( TNL::sum( a * b ) )
{
......@@ -626,7 +567,6 @@ auto operator,( const Containers::VectorView< Real1, Device, Index >& a, const C
////
// TODO: Replace this with multiplication when its safe
template< typename Real, typename Device, typename Index, typename ET >
__cuda_callable__
Containers::VectorView< Real, Device, Index >
Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
{
......@@ -635,7 +575,6 @@ Scale( const Containers::VectorView< Real, Device, Index >& a, const ET& b )
}
template< typename ET, typename Real, typename Device, typename Index >
__cuda_callable__
Containers::Expressions::BinaryExpressionTemplate< ET, Containers::VectorView< Real, Device, Index >, Containers::Expressions::Multiplication >
Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
{
......@@ -644,7 +583,6 @@ Scale( const ET& a, const Containers::VectorView< Real, Device, Index >& b )
}
template< typename Real1, typename Real2, typename Device, typename Index >
__cuda_callable__
Containers::Expressions::BinaryExpressionTemplate< Containers::VectorView< Real1, Device, Index >, Containers::VectorView< Real2, Device, Index >, Containers::Expressions::Multiplication >
Scale( const Containers::VectorView< Real1, Device, Index >& a, const Containers::VectorView< Real2, Device, Index >& b )
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment