Loading build +23 −6 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ HELP="no" VERBOSE="" WITH_CLANG="no" WITH_MPI="yes" WITH_CUDA="yes" WITH_CUDA_ARCH="auto" WITH_OPENMP="yes" Loading Loading @@ -42,6 +43,7 @@ do --verbose ) VERBOSE="VERBOSE=1" ;; --help ) HELP="yes" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;; --with-mic=* ) WITH_MIC="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; Loading Loading @@ -76,12 +78,13 @@ then echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default." echo " --install=yes/no Enables the installation of TNL files." echo " --with-mic=yes/no Enable MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)." echo " --with-cuda=yes/no Enable CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/30/35/... Choose CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enable OpenMP. 'yes' by default." echo " --with-tests=yes/no Enable unit tests. 'yes' by default." echo " --with-coverage=yes/no Enable code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-mpi=yes/no Enables MPI. 'no' by default (Intel Compiler required)." echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/30/35/... Chooses CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." echo " --with-tests=yes/no Enables unit tests. 'yes' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-examples=yes/no Compile the 'examples' directory. 'yes' by default." echo " --with-templates-instantiation=yes/no Precompiles some TNL templates during the build. 'no' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." Loading @@ -98,6 +101,20 @@ then export CC=clang fi if test ${WITH_MPI} = "yes"; then if ! [ -x "$(command -v mpic++)" ]; then echo "Warning:mpic++ is not installed on this system. MPI support is turned off." else export CXX=mpic++ fi if ! [ -x "$(command -v mpicc)" ]; then echo "Warning: mpicc is not installed on this system." else export CC=mpicc fi fi echo "Configuring ${BUILD} $TARGET ..." Loading src/TNL/Communicators/NoDistrCommunicator.h +3 −3 Original line number Diff line number Diff line Loading @@ -72,13 +72,13 @@ namespace Communicators { static Request ISend( const T *data, int count, int dest) { return 1; }; } template <typename T> static Request IRecv( const T *data, int count, int src) { return 1; }; } static void WaitAll(Request *reqs, int length) { Loading @@ -87,7 +87,7 @@ namespace Communicators { template< typename T > static void Bcast( T& data, int count, int root) { }; } /* template< typename T > static void Allreduce( T& data, Loading src/TNL/Containers/Algorithms/ArrayOperations.h +2 −0 Original line number Diff line number Diff line Loading @@ -81,10 +81,12 @@ class ArrayOperations< Devices::Cuda > static void freeMemory( Element* data ); template< typename Element > __cuda_callable__ static void setMemoryElement( Element* data, const Element& value ); template< typename Element > __cuda_callable__ static Element getMemoryElement( const Element* data ); // TODO: does not make sense for CUDA - remove? Loading src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +10 −2 Original line number Diff line number Diff line Loading @@ -60,24 +60,32 @@ freeMemory( Element* data ) } template< typename Element > void __cuda_callable__ void ArrayOperations< Devices::Cuda >:: setMemoryElement( Element* data, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); #ifdef __CUDAARCH__ *data = value; #else ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 ); #endif } template< typename Element > Element __cuda_callable__ Element ArrayOperations< Devices::Cuda >:: getMemoryElement( const Element* data ) { TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); #ifdef __CUDAARCH__ return *data; #else Element result; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 ); return result; #endif } template< typename Element, typename Index > Loading src/TNL/Containers/StaticVector.h +18 −1 Original line number Diff line number Diff line Loading @@ -58,6 +58,10 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -156,6 +160,10 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -257,6 +265,10 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Adding operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -358,6 +370,11 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading
build +23 −6 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ HELP="no" VERBOSE="" WITH_CLANG="no" WITH_MPI="yes" WITH_CUDA="yes" WITH_CUDA_ARCH="auto" WITH_OPENMP="yes" Loading Loading @@ -42,6 +43,7 @@ do --verbose ) VERBOSE="VERBOSE=1" ;; --help ) HELP="yes" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;; --with-mic=* ) WITH_MIC="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; Loading Loading @@ -76,12 +78,13 @@ then echo " --build-jobs=NUM Number of processes to be used for the build. It is set to the number of available CPU cores by default." echo " --prefix=PATH Prefix for the installation directory. ${HOME}/local by default." echo " --install=yes/no Enables the installation of TNL files." echo " --with-mic=yes/no Enable MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)." echo " --with-cuda=yes/no Enable CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/30/35/... Choose CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enable OpenMP. 'yes' by default." echo " --with-tests=yes/no Enable unit tests. 'yes' by default." echo " --with-coverage=yes/no Enable code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-mpi=yes/no Enables MPI. 'no' by default (Intel Compiler required)." echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/30/35/... Chooses CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." echo " --with-tests=yes/no Enables unit tests. 'yes' by default." echo " --with-coverage=yes/no Enables code coverage reports for unit tests. 'no' by default (lcov is required)." echo " --with-examples=yes/no Compile the 'examples' directory. 'yes' by default." echo " --with-templates-instantiation=yes/no Precompiles some TNL templates during the build. 'no' by default." echo " --cmake=CMAKE Path to cmake. 'cmake' by default." Loading @@ -98,6 +101,20 @@ then export CC=clang fi if test ${WITH_MPI} = "yes"; then if ! [ -x "$(command -v mpic++)" ]; then echo "Warning:mpic++ is not installed on this system. MPI support is turned off." else export CXX=mpic++ fi if ! [ -x "$(command -v mpicc)" ]; then echo "Warning: mpicc is not installed on this system." else export CC=mpicc fi fi echo "Configuring ${BUILD} $TARGET ..." Loading
src/TNL/Communicators/NoDistrCommunicator.h +3 −3 Original line number Diff line number Diff line Loading @@ -72,13 +72,13 @@ namespace Communicators { static Request ISend( const T *data, int count, int dest) { return 1; }; } template <typename T> static Request IRecv( const T *data, int count, int src) { return 1; }; } static void WaitAll(Request *reqs, int length) { Loading @@ -87,7 +87,7 @@ namespace Communicators { template< typename T > static void Bcast( T& data, int count, int root) { }; } /* template< typename T > static void Allreduce( T& data, Loading
src/TNL/Containers/Algorithms/ArrayOperations.h +2 −0 Original line number Diff line number Diff line Loading @@ -81,10 +81,12 @@ class ArrayOperations< Devices::Cuda > static void freeMemory( Element* data ); template< typename Element > __cuda_callable__ static void setMemoryElement( Element* data, const Element& value ); template< typename Element > __cuda_callable__ static Element getMemoryElement( const Element* data ); // TODO: does not make sense for CUDA - remove? Loading
src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +10 −2 Original line number Diff line number Diff line Loading @@ -60,24 +60,32 @@ freeMemory( Element* data ) } template< typename Element > void __cuda_callable__ void ArrayOperations< Devices::Cuda >:: setMemoryElement( Element* data, const Element& value ) { TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); #ifdef __CUDAARCH__ *data = value; #else ArrayOperations< Devices::Cuda >::setMemory( data, value, 1 ); #endif } template< typename Element > Element __cuda_callable__ Element ArrayOperations< Devices::Cuda >:: getMemoryElement( const Element* data ) { TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); #ifdef __CUDAARCH__ return *data; #else Element result; ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< Element, Element, int >( &result, data, 1 ); return result; #endif } template< typename Element, typename Index > Loading
src/TNL/Containers/StaticVector.h +18 −1 Original line number Diff line number Diff line Loading @@ -58,6 +58,10 @@ class StaticVector : public StaticArray< Size, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -156,6 +160,10 @@ class StaticVector< 1, Real > : public StaticArray< 1, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -257,6 +265,10 @@ class StaticVector< 2, Real > : public StaticArray< 2, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Adding operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading Loading @@ -358,6 +370,11 @@ class StaticVector< 3, Real > : public StaticArray< 3, Real > __cuda_callable__ StaticVector& operator *= ( const Real& c ); //! Division by number __cuda_callable__ StaticVector& operator /= ( const Real& c ); //! Addition operator __cuda_callable__ StaticVector operator + ( const StaticVector& u ) const; Loading