diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b7515f5f92692058f0098e28a4cc90cf2638002..620e46a82d9e227f96c98ddd4781f9841dca4b4b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) - set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) + set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() diff --git a/install b/install index 887b9029679274b890150da23df9e170d9b9b325..48296a4c2122869653ba771238c7542e936defa7 100755 --- a/install +++ b/install @@ -36,14 +36,14 @@ make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install -#cd ../Release -#${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -# -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -# -DWITH_CUDA=${WITH_CUDA} \ -# -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -# -DPETSC_DIR=${PETSC_DIR} \ -# -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} -#make -j${CPUS} ${VERBOSE} -#make -j${CPUS} test -#make -j${CPUS} install +cd ../Release +${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ + -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ + -DWITH_CUDA=${WITH_CUDA} \ + -DWITH_CUSPARSE=${WITH_CUSPARSE} \ + -DPETSC_DIR=${PETSC_DIR} \ + -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} +make -j${CPUS} ${VERBOSE} +make -j${CPUS} test +make -j${CPUS} install diff --git a/src/core/arrays/tnlMultiArray.h b/src/core/arrays/tnlMultiArray.h index 5656805c66cb9fa9193ec1fa52e46bd296523b31..252b5f9f5a3a42825186e467f475f1be9d3b8adf 100644 --- a/src/core/arrays/tnlMultiArray.h +++ b/src/core/arrays/tnlMultiArray.h @@ -38,6 +38,9 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); @@ -49,7 +52,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 1, Index >& dimensions ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void getDimensions( Index& iSize ) const; @@ -63,7 +66,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index getElementIndex( const Index i ) const; @@ -123,6 +126,9 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); @@ -134,12 +140,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 2, Index >& dimensions ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void getDimensions( Index& jSize, Index& iSize ) const; #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 2, Index >& getDimensions() const; @@ -148,7 +154,7 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index getElementIndex( const Index j, const Index i ) const; @@ -167,12 +173,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usually). */ #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& operator()( const Index j, const Index i ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& operator()( const Index j, const Index i ) const; @@ -212,6 +218,9 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); @@ -223,12 +232,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 3, Index >& dimensions ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void getDimensions( Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 3, Index >& getDimensions() const; @@ -237,7 +246,7 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index getElementIndex( const Index k, const Index j, const Index i ) const; @@ -256,12 +265,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& operator()( const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& operator()( const Index k, const Index j, const Index i ) const; @@ -301,6 +310,9 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); @@ -312,12 +324,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 4, Index >& dimensions ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 4, Index >& getDimensions() const; @@ -326,7 +338,7 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA - __device__ __host__ + // __device__ __host__ #endif Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const; @@ -345,12 +357,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& operator()( const Index l, const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const; @@ -417,10 +429,8 @@ extern template class tnlMultiArray< 4, double, tnlHost, int >; extern template class tnlMultiArray< 4, float, tnlHost, long int >; extern template class tnlMultiArray< 4, double, tnlHost, long int >; -#ifdef HAVE_CUDA -#endif - -extern template class tnlMultiArray< 1, float, tnlCuda, int >; +// TODO: There are problems with nvlink - it maght be better in later versions +/*extern template class tnlMultiArray< 1, float, tnlCuda, int >; extern template class tnlMultiArray< 1, double, tnlCuda, int >; extern template class tnlMultiArray< 1, float, tnlCuda, long int >; extern template class tnlMultiArray< 1, double, tnlCuda, long int >; @@ -435,7 +445,7 @@ extern template class tnlMultiArray< 3, double, tnlCuda, long int >; extern template class tnlMultiArray< 4, float, tnlCuda, int >; extern template class tnlMultiArray< 4, double, tnlCuda, int >; extern template class tnlMultiArray< 4, float, tnlCuda, long int >; -extern template class tnlMultiArray< 4, double, tnlCuda, long int >; +extern template class tnlMultiArray< 4, double, tnlCuda, long int >;*/ #endif diff --git a/src/core/tnlTuple.h b/src/core/tnlTuple.h index b971634bf9d0e863775029d3a4f7bfd1434a5560..c3137d79cdf086bb94d1afa10e4ac0498da4cc36 100644 --- a/src/core/tnlTuple.h +++ b/src/core/tnlTuple.h @@ -33,87 +33,171 @@ class tnlTuple public: typedef Real RealType; +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple(); +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple( const Real v[ Size ] ); //! This sets all vector components to v +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple( const Real& v ); //! Copy constructor +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple( const tnlTuple< Size, Real >& v ); //! This is constructore of vector with Size = 2. +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple( const Real& v1, const Real& v2 ); - //! This is constructore of vector with Size = 3. + //! This is constructore of vector with Size = 3 +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple( const Real& v1, const Real& v2, const Real& v3 ); static tnlString getType(); +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& operator[]( int i ) const; +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& operator[]( int i ); //! Returns the first coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& x(); //! Returns the first coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& x() const; //! Returns the second coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& y(); //! Returns the second coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& y() const; //! Returns the third coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& z(); //! Returns the third coordinate +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& z() const; //! Adding operator +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple& operator += ( const tnlTuple& v ); //! Subtracting operator +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple& operator -= ( const tnlTuple& v ); //! Multiplication with number +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple& operator *= ( const Real& c ); //! Adding operator +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple operator + ( const tnlTuple& u ) const; //! Subtracting operator +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple operator - ( const tnlTuple& u ) const; //! Multiplication with number +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple operator * ( const Real& c ) const; //! +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple& operator = ( const tnlTuple& v ); //! Scalar product +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real operator * ( const tnlTuple& u ) const; //! Comparison operator template< typename Real2 > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator == ( const tnlTuple< Size, Real2 >& v ) const; //! Comparison operator template< typename Real2 > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator != ( const tnlTuple< Size, Real2 >& v ) const; +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator < ( const tnlTuple& v ) const; +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator <= ( const tnlTuple& v ) const; +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator > ( const tnlTuple& v ) const; +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool operator >= ( const tnlTuple& v ) const; bool save( tnlFile& file ) const; @@ -132,12 +216,18 @@ template< int Size, typename Real > ostream& operator << ( ostream& str, const tnlTuple< Size, Real >& v ); template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple() { bzero( data, Size * sizeof( Real ) ); }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) { if( Size == 1 ) @@ -161,6 +251,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple( const Real& v ) { if( Size == 1 ) @@ -181,6 +274,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real& v ) }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) { if( Size == 1 ) @@ -205,28 +301,37 @@ tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2 ) { tnlAssert( Size == 2, - cerr << "Using this constructor does not makes sense for Size different then 2.") + printf( "Using this constructor does not makes sense for Size different then 2.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2, const Real& v3 ) { tnlAssert( Size == 3, - cerr << "Using this constructor does not makes sense for Size different then 3.") + printf( "Using this constructor does not makes sense for Size different then 3.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; data[ 2 ] = v3; } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlString tnlTuple< Size, Real > :: getType() { return tnlString( "tnlTuple< " ) + @@ -237,6 +342,9 @@ tnlString tnlTuple< Size, Real > :: getType() } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& tnlTuple< Size, Real > :: operator[]( int i ) const { assert( i >= 0 && i < Size ); @@ -244,6 +352,9 @@ const Real& tnlTuple< Size, Real > :: operator[]( int i ) const }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& tnlTuple< Size, Real > :: operator[]( int i ) { assert( i < Size ); @@ -251,48 +362,60 @@ Real& tnlTuple< Size, Real > :: operator[]( int i ) }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& tnlTuple< Size, Real > :: x() { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { - cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; + printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& tnlTuple< Size, Real > :: x() const { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { - cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; + printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& tnlTuple< Size, Real > :: y() { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { - cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; + printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& tnlTuple< Size, Real > :: y() const { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { - cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; + printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; @@ -300,30 +423,39 @@ const Real& tnlTuple< Size, Real > :: y() const }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real& tnlTuple< Size, Real > :: z() { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { - cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; + printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif const Real& tnlTuple< Size, Real > :: z() const { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { - cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; + printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& v ) { if( Size == 1 ) @@ -348,6 +480,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& v ) { if( Size == 1 ) @@ -372,6 +507,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) { if( Size == 1 ) @@ -396,6 +534,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u ) const { // TODO: Leads to sigsegv @@ -403,6 +544,9 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u ) const { // TODO: Leads to sigsegv @@ -410,12 +554,18 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator * ( const Real& c ) const { return tnlTuple( * this ) *= c; }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v ) { memcpy( &data[ 0 ], &v. data[ 0 ], Size * sizeof( Real ) ); @@ -426,6 +576,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const { if( Size == 1 ) @@ -448,6 +601,9 @@ Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const template< int Size, typename Real > template< typename Real2 > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) @@ -468,6 +624,9 @@ bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) template< int Size, typename Real > template< typename Real2 > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) @@ -486,6 +645,9 @@ bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) }; template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const { if( Size == 1 ) @@ -507,6 +669,9 @@ bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const { if( Size == 1 ) @@ -525,6 +690,9 @@ bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const { if( Size == 1 ) @@ -543,6 +711,9 @@ bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const } template< int Size, typename Real > +#ifdef HAVE_CUDA + __host__ __device__ +#endif bool tnlTuple< Size, Real > :: operator >= ( const tnlTuple& u ) const { if( Size == 1 ) diff --git a/src/implementation/core/arrays/CMakeLists.txt b/src/implementation/core/arrays/CMakeLists.txt index 7ffaae7c8196ef250cf11b8e9b3fd09937c237a7..69b3be5c4f469c5b45965e9eebed8bfacbbcccd0 100755 --- a/src/implementation/core/arrays/CMakeLists.txt +++ b/src/implementation/core/arrays/CMakeLists.txt @@ -19,6 +19,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu ${CURRENT_DIR}/tnlArray_impl.cu + ${CURRENT_DIR}/tnlMultiArray_impl.cu PARENT_SCOPE ) ELSE() set( common_SOURCES diff --git a/src/implementation/core/arrays/tnlArray_impl.cpp b/src/implementation/core/arrays/tnlArray_impl.cpp index b60b601703943e7d82ff017e1305d54305c026b8..6a429605828f2408f2cac263a8dcba637079be8d 100644 --- a/src/implementation/core/arrays/tnlArray_impl.cpp +++ b/src/implementation/core/arrays/tnlArray_impl.cpp @@ -24,4 +24,11 @@ template class tnlArray< double, tnlHost, int >; template class tnlArray< float, tnlHost, long int >; template class tnlArray< double, tnlHost, long int >; +#ifndef HAVE_CUDA +template class tnlArray< float, tnlCuda, int >; +template class tnlArray< double, tnlCuda, int >; +template class tnlArray< float, tnlCuda, long int >; +template class tnlArray< double, tnlCuda, long int >; +#endif + #endif diff --git a/src/implementation/core/arrays/tnlMultiArray1D_impl.h b/src/implementation/core/arrays/tnlMultiArray1D_impl.h index 5be9b62c6427da339f80b99377d3d4091cfb5786..d1489cf3c1e8cce5a68209458bf2f4fcbfa5a5ca 100644 --- a/src/implementation/core/arrays/tnlMultiArray1D_impl.h +++ b/src/implementation/core/arrays/tnlMultiArray1D_impl.h @@ -18,9 +18,10 @@ #ifndef TNLMULTIARRAY1D_IMPL_H_ #define TNLMULTIARRAY1D_IMPL_H_ - - template< typename Element, typename Device, typename Index > +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray< 1, Element, Device, Index > :: tnlMultiArray() { } @@ -72,7 +73,7 @@ bool tnlMultiArray< 1, Element, Device, Index > :: setLike( const MultiArray& mu template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void tnlMultiArray< 1, Element, Device, Index > :: getDimensions( Index& xSize ) const { @@ -90,13 +91,12 @@ const tnlTuple< 1, Index >& tnlMultiArray< 1, Element, Device, Index > :: getDim template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index tnlMultiArray< 1, Element, Device, Index > :: getElementIndex( const Index i ) const { tnlAssert( i >= 0 && i < this -> dimensions[ 0 ], - cerr << "i = " << i - << "this -> dimensions[ 0 ] " << this -> dimensions[ 0 ] ); + printf( "i = %d this -> dimensions[ 0 ] = %d \n", i, this -> dimensions[ 0 ] ) ); return i; } diff --git a/src/implementation/core/arrays/tnlMultiArray2D_impl.h b/src/implementation/core/arrays/tnlMultiArray2D_impl.h index 07b89e10dd5c8277d581b90aff806feb873474de..2134dd10b95b833863588b4063d0ca5a5bf27487 100644 --- a/src/implementation/core/arrays/tnlMultiArray2D_impl.h +++ b/src/implementation/core/arrays/tnlMultiArray2D_impl.h @@ -21,6 +21,9 @@ template< typename Element, typename Device, typename Index > +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray< 2, Element, Device, Index > :: tnlMultiArray() { } @@ -81,7 +84,7 @@ bool tnlMultiArray< 2, Element, Device, Index > :: setLike( const MultiArray& mu template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void tnlMultiArray< 2, Element, Device, Index > :: getDimensions( Index& jSize, Index& iSize ) const { @@ -91,7 +94,7 @@ void tnlMultiArray< 2, Element, Device, Index > :: getDimensions( Index& jSize, template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 2, Index >& tnlMultiArray< 2, Element, Device, Index > :: getDimensions() const { @@ -100,15 +103,13 @@ const tnlTuple< 2, Index >& tnlMultiArray< 2, Element, Device, Index > :: getDim template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index tnlMultiArray< 2, Element, Device, Index > :: getElementIndex( const Index j, const Index i ) const { tnlAssert( i >= 0 && i < this -> dimensions[ 0 ] && j >= 0 && j < this -> dimensions[ 1 ], - cerr << "i = " << i - << "j = " << j - << "this -> dimensions[ 0 ] = " << this -> dimensions[ 0 ] - << "this -> dimensions[ 1 ] = " << this -> dimensions[ 1 ] ); + printf( "i = %d j = %d this -> dimensions[ 0 ] = %d this -> dimensions[ 1 ] = %d \n", + i, j, this -> dimensions[ 0 ], this -> dimensions[ 1 ] ) ); return j * this -> dimensions[ 0 ] + i; } @@ -126,7 +127,7 @@ void tnlMultiArray< 2, Element, Device, Index > :: setElement( const Index j, co template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& tnlMultiArray< 2, Element, Device, Index > :: operator()( const Index j, const Index i ) { @@ -135,7 +136,7 @@ Element& tnlMultiArray< 2, Element, Device, Index > :: operator()( const Index j template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& tnlMultiArray< 2, Element, Device, Index > :: operator()( const Index j, const Index i ) const { diff --git a/src/implementation/core/arrays/tnlMultiArray3D_impl.h b/src/implementation/core/arrays/tnlMultiArray3D_impl.h index 7df7e75f78764ec3fdf5dec697f71204420735ff..8ecbbd00865a74abf71e8246b6ecfe94bef4b346 100644 --- a/src/implementation/core/arrays/tnlMultiArray3D_impl.h +++ b/src/implementation/core/arrays/tnlMultiArray3D_impl.h @@ -21,6 +21,9 @@ template< typename Element, typename Device, typename Index > +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray< 3, Element, Device, Index > :: tnlMultiArray() { } @@ -86,7 +89,7 @@ bool tnlMultiArray< 3, Element, Device, Index > :: setLike( const MultiArray& mu template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void tnlMultiArray< 3, Element, Device, Index > :: getDimensions( Index& kSize, Index& jSize, @@ -99,7 +102,7 @@ void tnlMultiArray< 3, Element, Device, Index > :: getDimensions( Index& kSize, template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 3, Index >& tnlMultiArray< 3, Element, Device, Index > :: getDimensions() const { @@ -108,7 +111,7 @@ const tnlTuple< 3, Index >& tnlMultiArray< 3, Element, Device, Index > :: getDim template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index tnlMultiArray< 3, Element, Device, Index > :: getElementIndex( const Index k, const Index j, @@ -143,7 +146,7 @@ void tnlMultiArray< 3, Element, Device, Index > :: setElement( const Index k, template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& tnlMultiArray< 3, Element, Device, Index > :: operator()( const Index k, const Index j, @@ -154,7 +157,7 @@ Element& tnlMultiArray< 3, Element, Device, Index > :: operator()( const Index k template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& tnlMultiArray< 3, Element, Device, Index > :: operator()( const Index k, const Index j, diff --git a/src/implementation/core/arrays/tnlMultiArray4D_impl.h b/src/implementation/core/arrays/tnlMultiArray4D_impl.h index c041b5eb461359329ba70632072c5580dd7b05fd..b70f78f93463ab1c7a088c7e2183863a973b08b2 100644 --- a/src/implementation/core/arrays/tnlMultiArray4D_impl.h +++ b/src/implementation/core/arrays/tnlMultiArray4D_impl.h @@ -21,6 +21,9 @@ template< typename Element, typename Device, typename Index > +#ifdef HAVE_CUDA + //__device__ __host__ +#endif tnlMultiArray< 4, Element, Device, Index > :: tnlMultiArray() { } @@ -91,7 +94,7 @@ bool tnlMultiArray< 4, Element, Device, Index > :: setLike( const MultiArray& mu template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif void tnlMultiArray< 4, Element, Device, Index > :: getDimensions( Index& lSize, Index& kSize, @@ -106,7 +109,7 @@ void tnlMultiArray< 4, Element, Device, Index > :: getDimensions( Index& lSize, template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const tnlTuple< 4, Index >& tnlMultiArray< 4, Element, Device, Index > :: getDimensions() const { @@ -115,7 +118,7 @@ const tnlTuple< 4, Index >& tnlMultiArray< 4, Element, Device, Index > :: getDim template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Index tnlMultiArray< 4, Element, Device, Index > :: getElementIndex( const Index l, const Index k, @@ -155,7 +158,7 @@ void tnlMultiArray< 4, Element, Device, Index > :: setElement( const Index l, template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif Element& tnlMultiArray< 4, Element, Device, Index > :: operator()( const Index l, const Index k, @@ -167,7 +170,7 @@ Element& tnlMultiArray< 4, Element, Device, Index > :: operator()( const Index l template< typename Element, typename Device, typename Index > #ifdef HAVE_CUDA - __device__ __host__ + //__device__ __host__ #endif const Element& tnlMultiArray< 4, Element, Device, Index > :: operator()( const Index l, const Index k, diff --git a/src/implementation/core/arrays/tnlMultiArray_impl.cpp b/src/implementation/core/arrays/tnlMultiArray_impl.cpp index 5b1b46c3a4a453381b8ee4cfa46baa6eef60d38e..8d6efafd088aca0c75525dab886a763a0cca5b9a 100644 --- a/src/implementation/core/arrays/tnlMultiArray_impl.cpp +++ b/src/implementation/core/arrays/tnlMultiArray_impl.cpp @@ -36,8 +36,7 @@ template class tnlMultiArray< 4, double, tnlHost, int >; template class tnlMultiArray< 4, float, tnlHost, long int >; template class tnlMultiArray< 4, double, tnlHost, long int >; -#ifdef HAVE_CUDA -#endif +#ifndef HAVE_CUDA template class tnlMultiArray< 1, float, tnlCuda, int >; template class tnlMultiArray< 1, double, tnlCuda, int >; @@ -59,4 +58,7 @@ template class tnlMultiArray< 4, double, tnlCuda, long int >; #endif +#endif + + diff --git a/src/implementation/core/arrays/tnlMultiArray_impl.cu b/src/implementation/core/arrays/tnlMultiArray_impl.cu new file mode 100644 index 0000000000000000000000000000000000000000..29ff2431e5f297a8d8242993b34045bb12a7c5cf --- /dev/null +++ b/src/implementation/core/arrays/tnlMultiArray_impl.cu @@ -0,0 +1,43 @@ +/*************************************************************************** + tnlMultiArray_impl.cu - description + ------------------- + begin : Feb 4, 2014 + copyright : (C) 2014 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <core/arrays/tnlMultiArray.h> + +#ifdef TEMPLATE_EXPLICIT_INSTANTIATION + +#ifdef HAVE_CUDA +// TODO: There are problems with nvlink - it maght be better in later versions +/*template class tnlMultiArray< 1, float, tnlCuda, int >; +template class tnlMultiArray< 1, double, tnlCuda, int >; +template class tnlMultiArray< 1, float, tnlCuda, long int >; +template class tnlMultiArray< 1, double, tnlCuda, long int >; +template class tnlMultiArray< 2, float, tnlCuda, int >; +template class tnlMultiArray< 2, double, tnlCuda, int >; +template class tnlMultiArray< 2, float, tnlCuda, long int >; +template class tnlMultiArray< 2, double, tnlCuda, long int >; +template class tnlMultiArray< 3, float, tnlCuda, int >; +template class tnlMultiArray< 3, double, tnlCuda, int >; +template class tnlMultiArray< 3, float, tnlCuda, long int >; +template class tnlMultiArray< 3, double, tnlCuda, long int >; +template class tnlMultiArray< 4, float, tnlCuda, int >; +template class tnlMultiArray< 4, double, tnlCuda, int >; +template class tnlMultiArray< 4, float, tnlCuda, long int >; +template class tnlMultiArray< 4, double, tnlCuda, long int >;*/ + +#endif + +#endif \ No newline at end of file diff --git a/tests/unit-tests/core/arrays/tnlMultiArrayTester.h b/tests/unit-tests/core/arrays/tnlMultiArrayTester.h index a0b473cd7b34c2c68f9ac452655b096482d3f103..ca421a1ecb57a3571574df187030fa1d5e24ce28 100644 --- a/tests/unit-tests/core/arrays/tnlMultiArrayTester.h +++ b/tests/unit-tests/core/arrays/tnlMultiArrayTester.h @@ -18,6 +18,8 @@ #ifndef TNLMULTIARRAYTESTER_H_ #define TNLMULTIARRAYTESTER_H_ +#ifdef HAVE_CPPUNIT + #include <cppunit/TestSuite.h> #include <cppunit/TestResult.h> #include <cppunit/TestCaller.h> @@ -27,12 +29,35 @@ #include <core/tnlFile.h> #ifdef HAVE_CUDA -template< int Dimensions, typename ElementType, typename IndexType > -__global__ void testSetGetElementKernel( tnlMultiArray< Dimensions, ElementType, tnlCuda, IndexType >* u ); -#endif +template< typename ElementType, typename IndexType > +__global__ void testSetGetElementKernel( tnlMultiArray< 1, ElementType, tnlCuda, IndexType >* u ) +{ + if( threadIdx.x < ( *u ).getDimensions().x() ) + ( *u )( threadIdx.x ) = threadIdx.x; +} +template< typename ElementType, typename IndexType > +__global__ void testSetGetElementKernel( tnlMultiArray< 2, ElementType, tnlCuda, IndexType >* u ) +{ + /*if( threadIdx.x < ( *u ).getDimensions().x() && + threadIdx.x < ( *u ).getDimensions().y() ) + ( *u )( threadIdx.x, threadIdx.x ) = threadIdx.x; + */ +} -template< int Dimension, typename ElementType, typename Device, typename IndexType > +template< typename ElementType, typename IndexType > +__global__ void testSetGetElementKernel( tnlMultiArray< 3, ElementType, tnlCuda, IndexType >* u ) +{ + /*if( threadIdx.x < ( *u ).getDimensions().x() && + threadIdx.x < ( *u ).getDimensions().y() && + threadIdx.x < ( *u ).getDimensions().z() ) + ( *u )( threadIdx.x, threadIdx.x, threadIdx.x ) = threadIdx.x; + */ +} + +#endif /* HAVE_CUDA */ + +template< int Dimensions, typename ElementType, typename Device, typename IndexType > class tnlMultiArrayTester : public CppUnit :: TestCase { public: @@ -63,29 +88,69 @@ class tnlMultiArrayTester : public CppUnit :: TestCase void testConstructorDestructor() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u; + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u; } void testSetSize() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u, v; + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u, v; u. setSize( 10 ); v. setSize( 10 ); } + void setDiagonalElement( tnlMultiArray< 1, ElementType, Device, IndexType >& u, + const IndexType& i, + const ElementType& v ) + { + u.setElement( i, v ); + } + + void setDiagonalElement( tnlMultiArray< 2, ElementType, Device, IndexType >& u, + const IndexType& i, + const ElementType& v ) + { + u.setElement( i, i, v ); + } + + void setDiagonalElement( tnlMultiArray< 3, ElementType, Device, IndexType >& u, + const IndexType& i, + const ElementType& v ) + { + u.setElement( i, i, i, v ); + } + + IndexType getDiagonalElement( tnlMultiArray< 1, ElementType, Device, IndexType >& u, + const IndexType& i ) + { + return u.getElement( i ); + } + + IndexType getDiagonalElement( tnlMultiArray< 2, ElementType, Device, IndexType >& u, + const IndexType& i ) + { + return u.getElement( i, i ); + } + + IndexType getDiagonalElement( tnlMultiArray< 3, ElementType, Device, IndexType >& u, + const IndexType& i ) + { + return u.getElement( i, i, i ); + } + + void testSetGetElement() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: u" ); u. setSize( 10 ); - if( Device::getDevice() == tnlDeviceHost ) + if( Device::getDevice() == tnlHostDevice ) { for( int i = 0; i < 10; i ++ ) - u. setElement( i, i ); + this->setDiagonalElement( u, i, i ); } - if( Device::getDevice() == tnlDeviceCuda ) + if( Device::getDevice() == tnlCudaDevice ) { #ifdef HAVE_CUDA - tnlArray< ElementType, Device, IndexType >* kernel_u = + tnlMultiArray< Dimensions, ElementType, Device, IndexType >* kernel_u = tnlCuda::passToDevice( u ); testSetGetElementKernel<<< 1, 16 >>>( kernel_u ); tnlCuda::freeFromDevice( kernel_u ); @@ -93,22 +158,22 @@ class tnlMultiArrayTester : public CppUnit :: TestCase #endif } for( int i = 0; i < 10; i ++ ) - CPPUNIT_ASSERT( u. getElement( i ) == i ); + CPPUNIT_ASSERT( getDiagonalElement( u, i ) == i ); }; void testComparisonOperator() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: u" ); - tnlMultiArray< Dimension, ElementType, Device, IndexType > v( "tnlMultiArrayTester :: v" ); - tnlMultiArray< Dimension, ElementType, Device, IndexType > w( "tnlMultiArrayTester :: w" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > v( "tnlMultiArrayTester :: v" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > w( "tnlMultiArrayTester :: w" ); u. setSize( 10 ); v. setSize( 10 ); w. setSize( 10 ); for( int i = 0; i < 10; i ++ ) { - u. setElement( i, i ); - v. setElement( i, i ); - w. setElement( i, 2*1 ); + setDiagonalElement( u, i, i ); + setDiagonalElement( v, i, i ); + setDiagonalElement( w, i, 2*1 ); } CPPUNIT_ASSERT( u == v ); CPPUNIT_ASSERT( ! ( u != v ) ); @@ -118,14 +183,14 @@ class tnlMultiArrayTester : public CppUnit :: TestCase void testEquivalenceOperator() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u; - tnlMultiArray< Dimension, ElementType, Device, IndexType > v; + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u; + tnlMultiArray< Dimensions, ElementType, Device, IndexType > v; u. setName( "tnlMultiArrayTester :: testEquivalenceOperator :: u" ); v. setName( "tnlMultiArrayTester :: testEquivalenceOperator :: v" ); u. setSize( 10 ); v. setSize( 10 ); for( int i = 0; i < 10; i ++ ) - u. setElement( i, i ); + setDiagonalElement( u, i, i ); v = u; //CPPUNIT_ASSERT( u == v ); //CPPUNIT_ASSERT( ! ( u != v ) ); @@ -133,7 +198,7 @@ class tnlMultiArrayTester : public CppUnit :: TestCase void testGetSize() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testSetSize - u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testSetSize - u" ); const int maxSize = 10; for( int i = 0; i < maxSize; i ++ ) u. setSize( i ); @@ -143,7 +208,7 @@ class tnlMultiArrayTester : public CppUnit :: TestCase void testReset() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testReset - u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testReset - u" ); u. setSize( 100 ); CPPUNIT_ASSERT( u. getSize() == 100 ); u. reset(); @@ -159,22 +224,22 @@ class tnlMultiArrayTester : public CppUnit :: TestCase { for( int i = 0; i < 100; i ++ ) { - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testSetSizeAndDestructor - u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "tnlMultiArrayTester :: testSetSizeAndDestructor - u" ); u. setSize( i ); } } void testSaveAndLoad() { - tnlMultiArray< Dimension, ElementType, Device, IndexType > v( "test-array-v" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > v( "test-array-v" ); v. setSize( 100 ); for( int i = 0; i < 100; i ++ ) - v. setElement( i, 3.14147 ); + setDiagonalElement( v, i, 3.14147 ); tnlFile file; file. open( "test-file.tnl", tnlWriteMode ); v. save( file ); file. close(); - tnlMultiArray< Dimension, ElementType, Device, IndexType > u( "test-array-u" ); + tnlMultiArray< Dimensions, ElementType, Device, IndexType > u( "test-array-u" ); file. open( "test-file.tnl", tnlReadMode ); u. load( file ); file. close(); @@ -182,30 +247,6 @@ class tnlMultiArrayTester : public CppUnit :: TestCase } }; -#ifdef HAVE_CUDA -template< typename ElementType, typename IndexType > -__global__ void testSetGetElementKernel( tnlMultiArray< 1, ElementType, tnlCuda, IndexType >* u ) -{ - if( threadIdx.x < ( *u ).getDimensions().x() ) - ( *u )( threadIdx.x ) = threadIdx.x; -} - -__global__ void testSetGetElementKernel( tnlMultiArray< 2, ElementType, tnlCuda, IndexType >* u ) -{ - if( threadIdx.x < ( *u ).getDimensions().x() && - threadIdx.x < ( *u ).getDimensions().y() ) - ( *u )( threadIdx.x, threadIdx.x ) = threadIdx.x; -} - -__global__ void testSetGetElementKernel( tnlMultiArray< 3, ElementType, tnlCuda, IndexType >* u ) -{ - if( threadIdx.x < ( *u ).getDimensions().x() && - threadIdx.x < ( *u ).getDimensions().y() && - threadIdx.x < ( *u ).getDimensions().z() ) - ( *u )( threadIdx.x, threadIdx.x, threadIdx.x ) = threadIdx.x; -} - -#endif /* HAVE_CUDA */ #else /* HAVE_CPPUNIT */ template< int, Dimensions, typename ElementType, typename Device, typename IndexType >