Revised assert macros (cb838ebd) · Commits · TNL / tnl-dev

src/TNL/Assert.h

+239 −8

Original line number	Diff line number	Diff line
		@@ -10,26 +10,246 @@

		#pragma once

		#include <TNL/Devices/CudaCallable.h>

		/****
		* Debugging assert
		*/

		#ifndef NDEBUG

		#include <sstream>
		#include <iostream>
		#include <stdlib.h>
		#include <assert.h>
		#include <stdio.h>

		namespace TNL {
		namespace Assert {

		inline void
		printDiagnosticsHost( const char* assertion,
		const char* message,
		const char* file,
		const char* function,
		int line,
		const char* diagnostics )
		{
		std::cerr << "Assertion '" << assertion << "' failed !!!\n"
		<< "Message: " << message << "\n"
		<< "File: " << file << "\n"
		<< "Function: " << function << "\n"
		<< "Line: " << line << "\n"
		<< "Diagnostics:\n" << diagnostics << std::endl;
		}

		__cuda_callable__
		inline void
		printDiagnosticsCuda( const char* assertion,
		const char* message,
		const char* file,
		const char* function,
		int line,
		const char* diagnostics )
		{
		printf( "Assertion '%s' failed !!!\n"
		"Message: %s\n"
		"File: %s\n"
		"Function: %s\n"
		"Line: %d\n"
		"Diagnostics: %s\n",
		assertion, message, file, function, line, diagnostics );
		}

		__cuda_callable__
		inline void
		fatalFailure()
		{
		#ifdef __CUDA_ARCH__
		// https://devtalk.nvidia.com/default/topic/509584/how-to-cancel-a-running-cuda-kernel-/
		// TODO: it is reported as "illegal instruction", but that leads to an abort as well...
		asm("trap;");
		#else
		throw EXIT_FAILURE;
		#endif
		}

		#ifndef NDEBUG
		template< typename T >
		std::string
		printToString( const T& value )
		{
		::std::stringstream ss;
		ss << value;
		return ss.str();
		}

		template<>
		inline std::string
		printToString( const bool& value )
		{
		if( value ) return "true";
		else return "false";
		}

		template< typename T1, typename T2 >
		__cuda_callable__ void
		cmpHelperOpFailure( const char* assertion,
		const char* message,
		const char* file,
		const char* function,
		int line,
		const char* lhs_expression,
		const char* rhs_expression,
		const T1& lhs_value,
		const T2& rhs_value,
		const char* op )
		{
		#ifdef __CUDA_ARCH__
		// diagnostics is not supported - we don't have the machinery
		// to construct the dynamic error message
		printDiagnosticsCuda( assertion, message, file, function, line,
		"Not supported in CUDA kernels." );
		#else
		std::stringstream str;
		if( std::string(op) == "==" ) {
		str << " Expected: " << lhs_expression;
		if( printToString(lhs_value) != lhs_expression ) {
		str << "\n Which is: " << lhs_value;
		}
		str << "\nTo be equal to: " << rhs_expression;
		if( printToString(rhs_value) != rhs_expression ) {
		str << "\n Which is: " << rhs_value;
		}
		str << std::endl;
		}
		else {
		str << "Expected: (" << lhs_expression << ") " << op << " (" << rhs_expression << "), "
		<< "actual: " << lhs_value << " vs " << rhs_value << std::endl;
		}
		printDiagnosticsHost( assertion, message, file, function, line,
		str.str().c_str() );
		#endif
		fatalFailure();
		}

		template< typename T1, typename T2 >
		__cuda_callable__ void
		cmpHelperTrue( const char* assertion,
		const char* message,
		const char* file,
		const char* function,
		int line,
		const char* expr1,
		const char* expr2,
		const T1& val1,
		const T2& val2 )
		{
		// explicit cast is necessary, because T1::operator! might not be defined
		if( ! (bool) val1 )
		::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line,
		expr1, "true", val1, true, "==" );
		}

		template< typename T1, typename T2 >
		__cuda_callable__ void
		cmpHelperFalse( const char* assertion,
		const char* message,
		const char* file,
		const char* function,
		int line,
		const char* expr1,
		const char* expr2,
		const T1& val1,
		const T2& val2 )
		{
		if( val1 )
		::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line,
		expr1, "false", val1, false, "==" );
		}

		// A macro for implementing the helper functions needed to implement
		// TNL_ASSERT_??. It is here just to avoid copy-and-paste of similar code.
		#define TNL_IMPL_CMP_HELPER_( op_name, op ) \
		template< typename T1, typename T2 > \
		__cuda_callable__ void \
		cmpHelper##op_name( const char* assertion, \
		const char* message, \
		const char* file, \
		const char* function, \
		int line, \
		const char* expr1, \
		const char* expr2, \
		const T1& val1, \
		const T2& val2 ) \
		{\
		if( ! ( (val1) op (val2) ) ) \
		::TNL::Assert::cmpHelperOpFailure( assertion, message, file, function, line, \
		expr1, expr2, val1, val2, #op );\
		}

		// Implements the helper function for TNL_ASSERT_EQ
		TNL_IMPL_CMP_HELPER_( EQ, == );
		// Implements the helper function for TNL_ASSERT_NE
		TNL_IMPL_CMP_HELPER_( NE, != );
		// Implements the helper function for TNL_ASSERT_LE
		TNL_IMPL_CMP_HELPER_( LE, <= );
		// Implements the helper function for TNL_ASSERT_LT
		TNL_IMPL_CMP_HELPER_( LT, < );
		// Implements the helper function for TNL_ASSERT_GE
		TNL_IMPL_CMP_HELPER_( GE, >= );
		// Implements the helper function for TNL_ASSERT_GT
		TNL_IMPL_CMP_HELPER_( GT, > );

		#undef TNL_IMPL_CMP_HELPER_

		} // namespace Assert
		} // namespace TNL

		// Internal macro wrapping the __PRETTY_FUNCTION__ "magic".
		#if defined( __NVCC__ ) && ( __CUDACC_VER__ < 80000 )
		#define TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)"
		#define __TNL_PRETTY_FUNCTION "(not known in CUDA 7.5 or older)"
		#else
		#define TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__
		#define __TNL_PRETTY_FUNCTION __PRETTY_FUNCTION__
		#endif

		// Internal macro to compose the string representing the assertion.
		// We can't do it easily at runtime, because we have to support assertions
		// in CUDA kernels, which can't use std::string objects. Instead, we do it
		// at compile time - adjacent strings are joined at the language level.
		#define __TNL_JOIN_STRINGS( val1, op, val2 ) \
		__STRING( val1 ) " " __STRING( op ) " " __STRING( val2 )

		// Internal macro to pass all the arguments to the specified cmpHelperOP
		#define __TNL_ASSERT_PRED2( pred, op, val1, val2, msg ) \
		pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \
		msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \
		#val1, #val2, val1, val2 )

		// Main definitions of the TNL_ASSERT_* macros
		// unary
		#define TNL_ASSERT_TRUE( val, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperTrue, ==, val, true, msg )
		#define TNL_ASSERT_FALSE( val, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperFalse, ==, val, false, msg )
		// binary
		#define TNL_ASSERT_EQ( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperEQ, ==, val1, val2, msg )
		#define TNL_ASSERT_NE( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperNE, !=, val1, val2, msg )
		#define TNL_ASSERT_LE( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperLE, <=, val1, val2, msg )
		#define TNL_ASSERT_LT( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperLT, <, val1, val2, msg )
		#define TNL_ASSERT_GE( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperGE, >=, val1, val2, msg )
		#define TNL_ASSERT_GT( val1, val2, msg ) \
		__TNL_ASSERT_PRED2( ::TNL::Assert::cmpHelperGT, >, val1, val2, msg )




		/****
		* Original assert macro with custom command for diagnostic.
		*/

		// __CUDA_ARCH__ is defined by the compiler only for code executed on GPU
		#ifdef __CUDA_ARCH__
		#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command ) \
		@@ -39,7 +259,7 @@
		__STRING( ___tnl__assert_condition ), \
		__FILE__, \
		__LINE__ ); \
		\
		asm("trap;"); \
		}

		#else // __CUDA_ARCH__
		@@ -48,7 +268,7 @@
		{ \
		std::cerr << "Assertion '" << __STRING( ___tnl__assert_condition ) << "' failed !!!" << std::endl \
		<< "File: " << __FILE__ << std::endl \
		<< "Function: " << TNL_PRETTY_FUNCTION << std::endl \
		<< "Function: " << __TNL_PRETTY_FUNCTION << std::endl \
		<< "Line: " << __LINE__ << std::endl \
		<< "Diagnostics: "; \
		___tnl__assert_command; \
		@@ -57,5 +277,16 @@
		#endif // __CUDA_ARCH__

		#else /* #ifndef NDEBUG */

		// empty macros for optimized build
		#define TNL_ASSERT_TRUE( val, msg )
		#define TNL_ASSERT_FALSE( val, msg )
		#define TNL_ASSERT_EQ( val1, val2, msg )
		#define TNL_ASSERT_NE( val1, val2, msg )
		#define TNL_ASSERT_LE( val1, val2, msg )
		#define TNL_ASSERT_LT( val1, val2, msg )
		#define TNL_ASSERT_GE( val1, val2, msg )
		#define TNL_ASSERT_GT( val1, val2, msg )
		#define TNL_ASSERT( ___tnl__assert_condition, ___tnl__assert_command )

		#endif /* #ifndef NDEBUG */

src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h

+21 −21

Original line number	Diff line number	Diff line
		@@ -49,7 +49,7 @@ bool
		ArrayOperations< Devices::Cuda >::
		freeMemory( Element* data )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to free a nullptr." );
		#ifdef HAVE_CUDA
		TNL_CHECK_CUDA_DEVICE;
		cudaFree( data );
		@@ -65,7 +65,7 @@ ArrayOperations< Devices::Cuda >::
		setMemoryElement( Element* data,
		const Element& value )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
		ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 );
		}

		@@ -74,7 +74,7 @@ Element
		ArrayOperations< Devices::Cuda >::
		getMemoryElement( const Element* data )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." );
		Element result;
		ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 );
		return result;
		@@ -85,7 +85,7 @@ Element&
		ArrayOperations< Devices::Cuda >::
		getArrayElementReference( Element* data, const Index i )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to access data through a nullptr." );
		return data[ i ];
		}

		@@ -94,7 +94,7 @@ const
		Element& ArrayOperations< Devices::Cuda >::
		getArrayElementReference( const Element* data, const Index i )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to access data through a nullptr." );
		return data[ i ];
		}

		@@ -123,7 +123,7 @@ setMemory( Element* data,
		const Element& value,
		const Index size )
		{
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." );
		#ifdef HAVE_CUDA
		dim3 blockSize( 0 ), gridSize( 0 );
		blockSize. x = 256;
		@@ -164,8 +164,8 @@ copyMemory( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		{
		TNL_ASSERT( destination, );
		TNL_ASSERT( source, );
		TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
		#ifdef HAVE_CUDA
		if( std::is_same< DestinationElement, SourceElement >::value )
		{
		@@ -198,8 +198,8 @@ compareMemory( const Element1* destination,
		const Element2* source,
		const Index size )
		{
		TNL_ASSERT( destination, );
		TNL_ASSERT( source, );
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		//TODO: The parallel reduction on the CUDA device with different element types is needed.
		bool result;
		Algorithms::tnlParallelReductionEqualities< Element1, Index > reductionEqualities;
		@@ -220,8 +220,8 @@ copyMemory( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		{
		TNL_ASSERT( destination, );
		TNL_ASSERT( source, );
		TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
		#ifdef HAVE_CUDA
		if( std::is_same< DestinationElement, SourceElement >::value )
		{
		@@ -276,9 +276,9 @@ compareMemory( const Element1* destination,
		/***
		* Here, destination is on host and source is on CUDA device.
		*/
		TNL_ASSERT( destination, );
		TNL_ASSERT( source, );
		TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
		#ifdef HAVE_CUDA
		Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ];
		Index compared( 0 );
		@@ -320,9 +320,9 @@ copyMemory( DestinationElement* destination,
		const SourceElement* source,
		const Index size )
		{
		TNL_ASSERT( destination, );
		TNL_ASSERT( source, );
		TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
		TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." );
		TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
		#ifdef HAVE_CUDA
		if( std::is_same< DestinationElement, SourceElement >::value )
		{
		@@ -373,9 +373,9 @@ compareMemory( const Element1* hostData,
		const Element2* deviceData,
		const Index size )
		{
		TNL_ASSERT( hostData, );
		TNL_ASSERT( deviceData, );
		TNL_ASSERT( size >= 0, std::cerr << "size = " << size );
		TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "Array size must be non-negative." );
		return ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory( deviceData, hostData, size );
		}

src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -30,7 +30,8 @@ allocateMemory( Element*& data,
		// According to the standard, new either throws, or returns non-nullptr.
		// Some (old) compilers don't comply:
		// https://stackoverflow.com/questions/550451/will-new-return-null-in-any-case
		TNL_ASSERT( data, );
		TNL_ASSERT_TRUE( data, "Operator 'new' returned a nullptr. This should never happen - there is "
		"either a bug or the compiler does not comply to the standard." );
		return true;
		}

src/TNL/Containers/Algorithms/Multireduction_impl.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -60,8 +60,8 @@ reduce( Operation& operation,
		typename Operation::ResultType* hostResult )
		{
		#ifdef HAVE_CUDA
		TNL_ASSERT( n > 0, );
		TNL_ASSERT( size <= ldInput1, );
		TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
		TNL_ASSERT_LE( size, ldInput1, "The size of the input cannot exceed its leading dimension." );

		typedef typename Operation::IndexType IndexType;
		typedef typename Operation::RealType RealType;
		@@ -171,8 +171,8 @@ reduce( Operation& operation,
		const typename Operation::RealType* input2,
		typename Operation::ResultType* result )
		{
		TNL_ASSERT( n > 0, );
		TNL_ASSERT( size <= ldInput1, );
		TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." );
		TNL_ASSERT_LE( size, ldInput1, "The size of the input cannot exceed its leading dimension." );

		typedef typename Operation::IndexType IndexType;
		typedef typename Operation::RealType RealType;

src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h

+34 −41

Original line number	Diff line number	Diff line
		@@ -49,7 +49,7 @@ getVectorMax( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionMax< Real, Index > operation;
		@@ -69,7 +69,7 @@ getVectorMin( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionMin< Real, Index > operation;
		@@ -89,7 +89,7 @@ getVectorAbsMax( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionAbsMax< Real, Index > operation;
		@@ -109,7 +109,7 @@ getVectorAbsMin( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionAbsMin< Real, Index > operation;
		@@ -129,7 +129,7 @@ getVectorL1Norm( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionAbsSum< Real, Index > operation;
		@@ -149,7 +149,7 @@ getVectorL2Norm( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionL2Norm< Real, Index > operation;
		@@ -171,9 +171,8 @@ getVectorLpNorm( const Vector& v,
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT( p > 0.0,
		std::cerr << " p = " << p );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		if( p == 1 )
		return getVectorL1Norm( v );
		@@ -198,7 +197,7 @@ getVectorSum( const Vector& v )
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		Real result( 0 );
		Algorithms::tnlParallelReductionSum< Real, Index > operation;
		@@ -219,8 +218,8 @@ getVectorDifferenceMax( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffMax< Real, Index > operation;
		@@ -241,8 +240,8 @@ getVectorDifferenceMin( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffMin< Real, Index > operation;
		@@ -264,8 +263,8 @@ getVectorDifferenceAbsMax( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffAbsMax< Real, Index > operation;
		@@ -286,8 +285,8 @@ getVectorDifferenceAbsMin( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffAbsMin< Real, Index > operation;
		@@ -308,8 +307,8 @@ getVectorDifferenceL1Norm( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffAbsSum< Real, Index > operation;
		@@ -330,8 +329,8 @@ getVectorDifferenceL2Norm( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffL2Norm< Real, Index > operation;
		@@ -354,10 +353,9 @@ getVectorDifferenceLpNorm( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( p > 0.0,
		std::cerr << " p = " << p );
		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffLpNorm< Real, Index > operation;
		@@ -379,8 +377,8 @@ getVectorDifferenceSum( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		Algorithms::tnlParallelReductionDiffSum< Real, Index > operation;
		@@ -418,7 +416,7 @@ vectorScalarMultiplication( Vector& v,
		typedef typename Vector::RealType Real;
		typedef typename Vector::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		#ifdef HAVE_CUDA
		dim3 blockSize( 0 ), gridSize( 0 );
		@@ -445,8 +443,8 @@ getScalarProduct( const Vector1& v1,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v1.getSize() > 0, );
		TNL_ASSERT( v1.getSize() == v2.getSize(), );
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		Real result( 0 );
		/*#if defined HAVE_CUBLAS && defined HAVE_CUDA
		@@ -502,10 +500,8 @@ addVector( Vector1& y,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( y.getSize() > 0, );
		TNL_ASSERT( y.getSize() == x.getSize(), );
		TNL_ASSERT( y.getData() != 0, );
		TNL_ASSERT( x.getData() != 0, );
		TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." );

		#ifdef HAVE_CUDA
		dim3 blockSize( 0 ), gridSize( 0 );
		@@ -573,12 +569,9 @@ addVectors( Vector1& v,
		typedef typename Vector1::RealType Real;
		typedef typename Vector1::IndexType Index;

		TNL_ASSERT( v.getSize() > 0, );
		TNL_ASSERT( v.getSize() == v1.getSize(), );
		TNL_ASSERT( v.getSize() == v2.getSize(), );
		TNL_ASSERT( v.getData() != 0, );
		TNL_ASSERT( v1.getData() != 0, );
		TNL_ASSERT( v2.getData() != 0, );
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." );
		TNL_ASSERT_EQ( v.getSize(), v2.getSize(), "The vector sizes must be the same." );

		#ifdef HAVE_CUDA
		dim3 blockSize( 0 ), gridSize( 0 );