Loading CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() Loading install +10 −10 Original line number Diff line number Diff line Loading @@ -36,14 +36,14 @@ make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install #cd ../Release #${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ # -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ # -DWITH_CUDA=${WITH_CUDA} \ # -DWITH_CUSPARSE=${WITH_CUSPARSE} \ # -DPETSC_DIR=${PETSC_DIR} \ # -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} #make -j${CPUS} ${VERBOSE} #make -j${CPUS} test #make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install src/core/arrays/tnlMultiArray.h +32 −22 Original line number Diff line number Diff line Loading @@ -38,6 +38,9 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -49,7 +52,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 1, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& iSize ) const; Loading @@ -63,7 +66,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index i ) const; Loading Loading @@ -123,6 +126,9 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -134,12 +140,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 2, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& jSize, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 2, Index >& getDimensions() const; Loading @@ -148,7 +154,7 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index j, const Index i ) const; Loading @@ -167,12 +173,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usually). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index j, const Index i ) const; Loading Loading @@ -212,6 +218,9 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -223,12 +232,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 3, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 3, Index >& getDimensions() const; Loading @@ -237,7 +246,7 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index k, const Index j, const Index i ) const; Loading @@ -256,12 +265,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index k, const Index j, const Index i ) const; Loading Loading @@ -301,6 +310,9 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -312,12 +324,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 4, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 4, Index >& getDimensions() const; Loading @@ -326,7 +338,7 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ // __device__ __host__ #endif Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const; Loading @@ -345,12 +357,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index l, const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const; Loading Loading @@ -417,10 +429,8 @@ extern template class tnlMultiArray< 4, double, tnlHost, int >; extern template class tnlMultiArray< 4, float, tnlHost, long int >; extern template class tnlMultiArray< 4, double, tnlHost, long int >; #ifdef HAVE_CUDA #endif extern template class tnlMultiArray< 1, float, tnlCuda, int >; // TODO: There are problems with nvlink - it maght be better in later versions /*extern template class tnlMultiArray< 1, float, tnlCuda, int >; extern template class tnlMultiArray< 1, double, tnlCuda, int >; extern template class tnlMultiArray< 1, float, tnlCuda, long int >; extern template class tnlMultiArray< 1, double, tnlCuda, long int >; Loading @@ -435,7 +445,7 @@ extern template class tnlMultiArray< 3, double, tnlCuda, long int >; extern template class tnlMultiArray< 4, float, tnlCuda, int >; extern template class tnlMultiArray< 4, double, tnlCuda, int >; extern template class tnlMultiArray< 4, float, tnlCuda, long int >; extern template class tnlMultiArray< 4, double, tnlCuda, long int >; extern template class tnlMultiArray< 4, double, tnlCuda, long int >;*/ #endif Loading src/core/tnlTuple.h +180 −9 Original line number Diff line number Diff line Loading @@ -33,87 +33,171 @@ class tnlTuple public: typedef Real RealType; #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple(); #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real v[ Size ] ); //! This sets all vector components to v #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v ); //! Copy constructor #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const tnlTuple< Size, Real >& v ); //! This is constructore of vector with Size = 2. #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v1, const Real& v2 ); //! This is constructore of vector with Size = 3. //! This is constructore of vector with Size = 3 #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v1, const Real& v2, const Real& v3 ); static tnlString getType(); #ifdef HAVE_CUDA __host__ __device__ #endif const Real& operator[]( int i ) const; #ifdef HAVE_CUDA __host__ __device__ #endif Real& operator[]( int i ); //! Returns the first coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& x(); //! Returns the first coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& x() const; //! Returns the second coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& y(); //! Returns the second coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& y() const; //! Returns the third coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& z(); //! Returns the third coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& z() const; //! Adding operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator += ( const tnlTuple& v ); //! Subtracting operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator -= ( const tnlTuple& v ); //! Multiplication with number #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator *= ( const Real& c ); //! Adding operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator + ( const tnlTuple& u ) const; //! Subtracting operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator - ( const tnlTuple& u ) const; //! Multiplication with number #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator * ( const Real& c ) const; //! #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator = ( const tnlTuple& v ); //! Scalar product #ifdef HAVE_CUDA __host__ __device__ #endif Real operator * ( const tnlTuple& u ) const; //! Comparison operator template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool operator == ( const tnlTuple< Size, Real2 >& v ) const; //! Comparison operator template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool operator != ( const tnlTuple< Size, Real2 >& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator < ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator <= ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator > ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator >= ( const tnlTuple& v ) const; bool save( tnlFile& file ) const; Loading @@ -132,12 +216,18 @@ template< int Size, typename Real > ostream& operator << ( ostream& str, const tnlTuple< Size, Real >& v ); template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple() { bzero( data, Size * sizeof( Real ) ); }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) { if( Size == 1 ) Loading @@ -161,6 +251,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v ) { if( Size == 1 ) Loading @@ -181,6 +274,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real& v ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) { if( Size == 1 ) Loading @@ -205,28 +301,37 @@ tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2 ) { tnlAssert( Size == 2, cerr << "Using this constructor does not makes sense for Size different then 2.") printf( "Using this constructor does not makes sense for Size different then 2.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2, const Real& v3 ) { tnlAssert( Size == 3, cerr << "Using this constructor does not makes sense for Size different then 3.") printf( "Using this constructor does not makes sense for Size different then 3.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; data[ 2 ] = v3; } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlString tnlTuple< Size, Real > :: getType() { return tnlString( "tnlTuple< " ) + Loading @@ -237,6 +342,9 @@ tnlString tnlTuple< Size, Real > :: getType() } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: operator[]( int i ) const { assert( i >= 0 && i < Size ); Loading @@ -244,6 +352,9 @@ const Real& tnlTuple< Size, Real > :: operator[]( int i ) const }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: operator[]( int i ) { assert( i < Size ); Loading @@ -251,48 +362,60 @@ Real& tnlTuple< Size, Real > :: operator[]( int i ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: x() { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: x() const { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: y() { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: y() const { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; Loading @@ -300,30 +423,39 @@ const Real& tnlTuple< Size, Real > :: y() const }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: z() { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: z() const { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& v ) { if( Size == 1 ) Loading @@ -348,6 +480,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& v ) { if( Size == 1 ) Loading @@ -372,6 +507,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) { if( Size == 1 ) Loading @@ -396,6 +534,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u ) const { // TODO: Leads to sigsegv Loading @@ -403,6 +544,9 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u ) const { // TODO: Leads to sigsegv Loading @@ -410,12 +554,18 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator * ( const Real& c ) const { return tnlTuple( * this ) *= c; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v ) { memcpy( &data[ 0 ], &v. data[ 0 ], Size * sizeof( Real ) ); Loading @@ -426,6 +576,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -448,6 +601,9 @@ Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const template< int Size, typename Real > template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) Loading @@ -468,6 +624,9 @@ bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) template< int Size, typename Real > template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) Loading @@ -486,6 +645,9 @@ bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -507,6 +669,9 @@ bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -525,6 +690,9 @@ bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -543,6 +711,9 @@ bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator >= ( const tnlTuple& u ) const { if( Size == 1 ) Loading src/implementation/core/arrays/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu ${CURRENT_DIR}/tnlArray_impl.cu ${CURRENT_DIR}/tnlMultiArray_impl.cu PARENT_SCOPE ) ELSE() set( common_SOURCES Loading Loading
CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -49,7 +49,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;-keep) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() Loading
install +10 −10 Original line number Diff line number Diff line Loading @@ -36,14 +36,14 @@ make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install #cd ../Release #${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ # -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ # -DWITH_CUDA=${WITH_CUDA} \ # -DWITH_CUSPARSE=${WITH_CUSPARSE} \ # -DPETSC_DIR=${PETSC_DIR} \ # -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} #make -j${CPUS} ${VERBOSE} #make -j${CPUS} test #make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install
src/core/arrays/tnlMultiArray.h +32 −22 Original line number Diff line number Diff line Loading @@ -38,6 +38,9 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -49,7 +52,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 1, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& iSize ) const; Loading @@ -63,7 +66,7 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index i ) const; Loading Loading @@ -123,6 +126,9 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -134,12 +140,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 2, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& jSize, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 2, Index >& getDimensions() const; Loading @@ -148,7 +154,7 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index j, const Index i ) const; Loading @@ -167,12 +173,12 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usually). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index j, const Index i ) const; Loading Loading @@ -212,6 +218,9 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -223,12 +232,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 3, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 3, Index >& getDimensions() const; Loading @@ -237,7 +246,7 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Index getElementIndex( const Index k, const Index j, const Index i ) const; Loading @@ -256,12 +265,12 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index k, const Index j, const Index i ) const; Loading Loading @@ -301,6 +310,9 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev typedef Device DeviceType; typedef Index IndexType; #ifdef HAVE_CUDA //__device__ __host__ #endif tnlMultiArray(); tnlMultiArray( const tnlString& name ); Loading @@ -312,12 +324,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 4, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const tnlTuple< 4, Index >& getDimensions() const; Loading @@ -326,7 +338,7 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ // __device__ __host__ #endif Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const; Loading @@ -345,12 +357,12 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif Element& operator()( const Index l, const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ //__device__ __host__ #endif const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const; Loading Loading @@ -417,10 +429,8 @@ extern template class tnlMultiArray< 4, double, tnlHost, int >; extern template class tnlMultiArray< 4, float, tnlHost, long int >; extern template class tnlMultiArray< 4, double, tnlHost, long int >; #ifdef HAVE_CUDA #endif extern template class tnlMultiArray< 1, float, tnlCuda, int >; // TODO: There are problems with nvlink - it maght be better in later versions /*extern template class tnlMultiArray< 1, float, tnlCuda, int >; extern template class tnlMultiArray< 1, double, tnlCuda, int >; extern template class tnlMultiArray< 1, float, tnlCuda, long int >; extern template class tnlMultiArray< 1, double, tnlCuda, long int >; Loading @@ -435,7 +445,7 @@ extern template class tnlMultiArray< 3, double, tnlCuda, long int >; extern template class tnlMultiArray< 4, float, tnlCuda, int >; extern template class tnlMultiArray< 4, double, tnlCuda, int >; extern template class tnlMultiArray< 4, float, tnlCuda, long int >; extern template class tnlMultiArray< 4, double, tnlCuda, long int >; extern template class tnlMultiArray< 4, double, tnlCuda, long int >;*/ #endif Loading
src/core/tnlTuple.h +180 −9 Original line number Diff line number Diff line Loading @@ -33,87 +33,171 @@ class tnlTuple public: typedef Real RealType; #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple(); #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real v[ Size ] ); //! This sets all vector components to v #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v ); //! Copy constructor #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const tnlTuple< Size, Real >& v ); //! This is constructore of vector with Size = 2. #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v1, const Real& v2 ); //! This is constructore of vector with Size = 3. //! This is constructore of vector with Size = 3 #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple( const Real& v1, const Real& v2, const Real& v3 ); static tnlString getType(); #ifdef HAVE_CUDA __host__ __device__ #endif const Real& operator[]( int i ) const; #ifdef HAVE_CUDA __host__ __device__ #endif Real& operator[]( int i ); //! Returns the first coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& x(); //! Returns the first coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& x() const; //! Returns the second coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& y(); //! Returns the second coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& y() const; //! Returns the third coordinate #ifdef HAVE_CUDA __host__ __device__ #endif Real& z(); //! Returns the third coordinate #ifdef HAVE_CUDA __host__ __device__ #endif const Real& z() const; //! Adding operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator += ( const tnlTuple& v ); //! Subtracting operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator -= ( const tnlTuple& v ); //! Multiplication with number #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator *= ( const Real& c ); //! Adding operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator + ( const tnlTuple& u ) const; //! Subtracting operator #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator - ( const tnlTuple& u ) const; //! Multiplication with number #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple operator * ( const Real& c ) const; //! #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple& operator = ( const tnlTuple& v ); //! Scalar product #ifdef HAVE_CUDA __host__ __device__ #endif Real operator * ( const tnlTuple& u ) const; //! Comparison operator template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool operator == ( const tnlTuple< Size, Real2 >& v ) const; //! Comparison operator template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool operator != ( const tnlTuple< Size, Real2 >& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator < ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator <= ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator > ( const tnlTuple& v ) const; #ifdef HAVE_CUDA __host__ __device__ #endif bool operator >= ( const tnlTuple& v ) const; bool save( tnlFile& file ) const; Loading @@ -132,12 +216,18 @@ template< int Size, typename Real > ostream& operator << ( ostream& str, const tnlTuple< Size, Real >& v ); template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple() { bzero( data, Size * sizeof( Real ) ); }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) { if( Size == 1 ) Loading @@ -161,6 +251,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real v[ Size ] ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v ) { if( Size == 1 ) Loading @@ -181,6 +274,9 @@ tnlTuple< Size, Real > :: tnlTuple( const Real& v ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) { if( Size == 1 ) Loading @@ -205,28 +301,37 @@ tnlTuple< Size, Real > :: tnlTuple( const tnlTuple< Size, Real >& v ) template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2 ) { tnlAssert( Size == 2, cerr << "Using this constructor does not makes sense for Size different then 2.") printf( "Using this constructor does not makes sense for Size different then 2.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > :: tnlTuple( const Real& v1, const Real& v2, const Real& v3 ) { tnlAssert( Size == 3, cerr << "Using this constructor does not makes sense for Size different then 3.") printf( "Using this constructor does not makes sense for Size different then 3.\n") ); data[ 0 ] = v1; data[ 1 ] = v2; data[ 2 ] = v3; } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlString tnlTuple< Size, Real > :: getType() { return tnlString( "tnlTuple< " ) + Loading @@ -237,6 +342,9 @@ tnlString tnlTuple< Size, Real > :: getType() } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: operator[]( int i ) const { assert( i >= 0 && i < Size ); Loading @@ -244,6 +352,9 @@ const Real& tnlTuple< Size, Real > :: operator[]( int i ) const }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: operator[]( int i ) { assert( i < Size ); Loading @@ -251,48 +362,60 @@ Real& tnlTuple< Size, Real > :: operator[]( int i ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: x() { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: x() const { tnlAssert( Size > 0, cerr << "Size = " << Size << endl; ); if( Size < 1 ) { cerr << "The size of the tnlTuple is too small to get x coordinate." << endl; printf( "The size of the tnlTuple is too small to get x coordinate.\n" ); abort(); } return data[ 0 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: y() { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: y() const { tnlAssert( Size > 1, cerr << "Size = " << Size << endl; ); if( Size < 2 ) { cerr << "The size of the tnlTuple is too small to get y coordinate." << endl; printf( "The size of the tnlTuple is too small to get y coordinate.\n" ); abort(); } return data[ 1 ]; Loading @@ -300,30 +423,39 @@ const Real& tnlTuple< Size, Real > :: y() const }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real& tnlTuple< Size, Real > :: z() { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif const Real& tnlTuple< Size, Real > :: z() const { tnlAssert( Size > 2, cerr << "Size = " << Size << endl; ); if( Size < 3 ) { cerr << "The size of the tnlTuple is too small to get z coordinate." << endl; printf( "The size of the tnlTuple is too small to get z coordinate.\n" ); abort(); } return data[ 2 ]; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& v ) { if( Size == 1 ) Loading @@ -348,6 +480,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator += ( const tnlTuple& }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& v ) { if( Size == 1 ) Loading @@ -372,6 +507,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator -= ( const tnlTuple& }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) { if( Size == 1 ) Loading @@ -396,6 +534,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator *= ( const Real& c ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u ) const { // TODO: Leads to sigsegv Loading @@ -403,6 +544,9 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator + ( const tnlTuple& u }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u ) const { // TODO: Leads to sigsegv Loading @@ -410,12 +554,18 @@ tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator - ( const tnlTuple& u }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real > tnlTuple< Size, Real > :: operator * ( const Real& c ) const { return tnlTuple( * this ) *= c; }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v ) { memcpy( &data[ 0 ], &v. data[ 0 ], Size * sizeof( Real ) ); Loading @@ -426,6 +576,9 @@ tnlTuple< Size, Real >& tnlTuple< Size, Real > :: operator = ( const tnlTuple& v }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -448,6 +601,9 @@ Real tnlTuple< Size, Real > :: operator * ( const tnlTuple& u ) const template< int Size, typename Real > template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) Loading @@ -468,6 +624,9 @@ bool tnlTuple< Size, Real > :: operator == ( const tnlTuple< Size, Real2 >& u ) template< int Size, typename Real > template< typename Real2 > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) const { if( Size == 1 ) Loading @@ -486,6 +645,9 @@ bool tnlTuple< Size, Real > :: operator != ( const tnlTuple< Size, Real2 >& u ) }; template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -507,6 +669,9 @@ bool tnlTuple< Size, Real > :: operator < ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -525,6 +690,9 @@ bool tnlTuple< Size, Real > :: operator <= ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const { if( Size == 1 ) Loading @@ -543,6 +711,9 @@ bool tnlTuple< Size, Real > :: operator > ( const tnlTuple& u ) const } template< int Size, typename Real > #ifdef HAVE_CUDA __host__ __device__ #endif bool tnlTuple< Size, Real > :: operator >= ( const tnlTuple& u ) const { if( Size == 1 ) Loading
src/implementation/core/arrays/CMakeLists.txt +1 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,7 @@ IF( BUILD_CUDA ) ${CURRENT_DIR}/tnlArrayOperationsHost_impl.cu ${CURRENT_DIR}/tnlArrayOperationsCuda_impl.cu ${CURRENT_DIR}/tnlArray_impl.cu ${CURRENT_DIR}/tnlMultiArray_impl.cu PARENT_SCOPE ) ELSE() set( common_SOURCES Loading