Loading CMakeLists.txt +10 −16 Original line number Diff line number Diff line cmake_minimum_required( VERSION 2.8 ) cmake_minimum_required( VERSION 2.8.10 ) project( tnl ) Loading Loading @@ -33,6 +33,7 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) endif() if( WITH_CUDA STREQUAL "yes" ) #AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 --relocatable-device-code=yes --device-c" ) AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" ) else() AddCompilerFlag( "-std=gnu++0x" ) Loading @@ -45,21 +46,14 @@ if( NOT WITH_CUDA STREQUAL "no" ) find_package( CUDA ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) if( CUDA_ARCHITECTURE STREQUAL "1.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10) endif() if( CUDA_ARCHITECTURE STREQUAL "1.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_11;-DCUDA_ARCH=11) endif() if( CUDA_ARCHITECTURE STREQUAL "1.2" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_12;-DCUDA_ARCH=12) endif() if( CUDA_ARCHITECTURE STREQUAL "1.3" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_13;-DCUDA_ARCH=13) endif() set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--shared;--compiler-options -fPIC) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_20;-DCUDA_ARCH=20) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_20;-DCUDA_ARCH=20) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() if( CUDA_ARCHITECTURE STREQUAL "2.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21) Loading install +1 −2 Original line number Diff line number Diff line Loading @@ -6,12 +6,11 @@ WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes #VERBOSE="VERBOSE=1" VERBOSE="VERBOSE=1" CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS="1" echo "Building $TARGET using $CPUS processors." if [ ! -d Debug ]; Loading src/CMakeLists.txt +4 −4 Original line number Diff line number Diff line Loading @@ -29,8 +29,8 @@ set( tnl_CUDA__SOURCES ${tnl_generators_CUDA__SOURCES} if( BUILD_CUDA ) CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} ) CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} OPTIONS -arch sm_20 ) else( BUILD_CUDA ) ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_SOURCES} ) Loading @@ -46,8 +46,8 @@ INSTALL( TARGETS tnl${debugExt}-${tnlVersion} DESTINATION lib ) IF( BUILD_MPI ) if( BUILD_CUDA ) CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} ) CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} OPTIONS -arch sm_20 ) else( BUILD_CUDA ) ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_SOURCES} ) Loading src/core/arrays/tnlArray.h +15 −0 Original line number Diff line number Diff line Loading @@ -55,14 +55,23 @@ class tnlArray : public virtual tnlObject void reset(); #ifdef HAVE_CUDA __device__ __host__ #endif Index getSize() const; void setElement( const Index i, const Element& x ); Element getElement( Index i ) const; #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator[] ( Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator[] ( Index i ) const; tnlArray< Element, Device, Index >& operator = ( const tnlArray< Element, Device, Index >& array ); Loading @@ -78,8 +87,14 @@ class tnlArray : public virtual tnlObject void setValue( const Element& e ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element* getData() const; #ifdef HAVE_CUDA __device__ __host__ #endif Element* getData(); /*! Loading src/core/arrays/tnlMultiArray.h +64 −7 Original line number Diff line number Diff line Loading @@ -48,34 +48,46 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 1, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 1, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index i ) const; void setElement( const Index i, Element value ); //! This method can be used for general access to the elements of the arrays. /*! It does not return reference but value. So it can be used to access * arrays in different adress space (usualy GPU device). * arrays in different address space (usually GPU device). * See also operator(). */ Element getElement( const Index i ) const; //! Operator for accessing elements of the array. /*! It returns reference to given elements so it cannot be * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index i ) const; template< typename MultiArray > bool operator == ( const MultiArray& array ) const; Loading Loading @@ -121,14 +133,23 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 2, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& jSize, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 2, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index j, const Index i ) const; void setElement( const Index j, const Index i, Element value ); Loading @@ -142,11 +163,17 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev //! Operator for accessing elements of the array. /*! It returns reference to given elements so it cannot be * used to access elements of arrays in different adress space * (GPU device usualy). * used to access elements of arrays in different address space * (GPU device usually). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index j, const Index i ) const; template< typename MultiArray > Loading Loading @@ -195,14 +222,23 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 3, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 3, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index k, const Index j, const Index i ) const; void setElement( const Index k, const Index j, const Index i, Element value ); Loading @@ -219,8 +255,14 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index k, const Index j, const Index i ) const; template< typename MultiArray > Loading Loading @@ -269,14 +311,23 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 4, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 4, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const; void setElement( const Index l, const Index k, const Index j, const Index i, Element value ); Loading @@ -293,8 +344,14 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index l, const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const; template< typename MultiArray > Loading Loading
CMakeLists.txt +10 −16 Original line number Diff line number Diff line cmake_minimum_required( VERSION 2.8 ) cmake_minimum_required( VERSION 2.8.10 ) project( tnl ) Loading Loading @@ -33,6 +33,7 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) endif() if( WITH_CUDA STREQUAL "yes" ) #AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 --relocatable-device-code=yes --device-c" ) AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" ) else() AddCompilerFlag( "-std=gnu++0x" ) Loading @@ -45,21 +46,14 @@ if( NOT WITH_CUDA STREQUAL "no" ) find_package( CUDA ) if( CUDA_FOUND ) set( BUILD_CUDA TRUE) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA ) if( CUDA_ARCHITECTURE STREQUAL "1.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_10;-DCUDA_ARCH=10) endif() if( CUDA_ARCHITECTURE STREQUAL "1.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_11;-DCUDA_ARCH=11) endif() if( CUDA_ARCHITECTURE STREQUAL "1.2" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_12;-DCUDA_ARCH=12) endif() if( CUDA_ARCHITECTURE STREQUAL "1.3" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_13;-DCUDA_ARCH=13) endif() set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--shared;--compiler-options -fPIC) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_20;-DCUDA_ARCH=20) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_20;-DCUDA_ARCH=20) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() if( CUDA_ARCHITECTURE STREQUAL "2.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21) Loading
install +1 −2 Original line number Diff line number Diff line Loading @@ -6,12 +6,11 @@ WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes #VERBOSE="VERBOSE=1" VERBOSE="VERBOSE=1" CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS="1" echo "Building $TARGET using $CPUS processors." if [ ! -d Debug ]; Loading
src/CMakeLists.txt +4 −4 Original line number Diff line number Diff line Loading @@ -29,8 +29,8 @@ set( tnl_CUDA__SOURCES ${tnl_generators_CUDA__SOURCES} if( BUILD_CUDA ) CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} ) CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} OPTIONS -arch sm_20 ) else( BUILD_CUDA ) ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_SOURCES} ) Loading @@ -46,8 +46,8 @@ INSTALL( TARGETS tnl${debugExt}-${tnlVersion} DESTINATION lib ) IF( BUILD_MPI ) if( BUILD_CUDA ) CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} ) CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} OPTIONS -arch sm_20 ) else( BUILD_CUDA ) ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_SOURCES} ) Loading
src/core/arrays/tnlArray.h +15 −0 Original line number Diff line number Diff line Loading @@ -55,14 +55,23 @@ class tnlArray : public virtual tnlObject void reset(); #ifdef HAVE_CUDA __device__ __host__ #endif Index getSize() const; void setElement( const Index i, const Element& x ); Element getElement( Index i ) const; #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator[] ( Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator[] ( Index i ) const; tnlArray< Element, Device, Index >& operator = ( const tnlArray< Element, Device, Index >& array ); Loading @@ -78,8 +87,14 @@ class tnlArray : public virtual tnlObject void setValue( const Element& e ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element* getData() const; #ifdef HAVE_CUDA __device__ __host__ #endif Element* getData(); /*! Loading
src/core/arrays/tnlMultiArray.h +64 −7 Original line number Diff line number Diff line Loading @@ -48,34 +48,46 @@ class tnlMultiArray< 1, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 1, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 1, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index i ) const; void setElement( const Index i, Element value ); //! This method can be used for general access to the elements of the arrays. /*! It does not return reference but value. So it can be used to access * arrays in different adress space (usualy GPU device). * arrays in different address space (usually GPU device). * See also operator(). */ Element getElement( const Index i ) const; //! Operator for accessing elements of the array. /*! It returns reference to given elements so it cannot be * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index i ) const; template< typename MultiArray > bool operator == ( const MultiArray& array ) const; Loading Loading @@ -121,14 +133,23 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 2, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& jSize, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 2, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index j, const Index i ) const; void setElement( const Index j, const Index i, Element value ); Loading @@ -142,11 +163,17 @@ class tnlMultiArray< 2, Element, Device, Index > : public tnlArray< Element, Dev //! Operator for accessing elements of the array. /*! It returns reference to given elements so it cannot be * used to access elements of arrays in different adress space * (GPU device usualy). * used to access elements of arrays in different address space * (GPU device usually). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index j, const Index i ) const; template< typename MultiArray > Loading Loading @@ -195,14 +222,23 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 3, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 3, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index k, const Index j, const Index i ) const; void setElement( const Index k, const Index j, const Index i, Element value ); Loading @@ -219,8 +255,14 @@ class tnlMultiArray< 3, Element, Device, Index > : public tnlArray< Element, Dev * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index k, const Index j, const Index i ) const; template< typename MultiArray > Loading Loading @@ -269,14 +311,23 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev bool setDimensions( const tnlTuple< 4, Index >& dimensions ); #ifdef HAVE_CUDA __device__ __host__ #endif void getDimensions( Index& l, Index& k, Index& j, Index& iSize ) const; #ifdef HAVE_CUDA __device__ __host__ #endif const tnlTuple< 4, Index >& getDimensions() const; //! Set dimensions of the array using another array as a template template< typename MultiArray > bool setLike( const MultiArray& v ); #ifdef HAVE_CUDA __device__ __host__ #endif Index getElementIndex( const Index l, const Index k, const Index j, const Index i ) const; void setElement( const Index l, const Index k, const Index j, const Index i, Element value ); Loading @@ -293,8 +344,14 @@ class tnlMultiArray< 4, Element, Device, Index > : public tnlArray< Element, Dev * used to access elements of arrays in different adress space * (GPU device usualy). */ #ifdef HAVE_CUDA __device__ __host__ #endif Element& operator()( const Index l, const Index k, const Index j, const Index i ); #ifdef HAVE_CUDA __device__ __host__ #endif const Element& operator()( const Index l, const Index k, const Index j, const Index i ) const; template< typename MultiArray > Loading