diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d260486c83a221fb34a161103ad9e5686f98d2d7..5882f50c99e56ca1a178c21507c8bef0e42684c7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,7 +21,6 @@ stages: WITH_OPENMP: "no" WITH_CUDA: "no" WITH_CUDA_ARCH: "auto" - WITH_MIC: "no" WITH_MPI: "no" # configurations WITH_TESTS: "no" @@ -46,6 +45,8 @@ stages: fi - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 + # enforce (more or less) warning-free builds + - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. @@ -56,7 +57,6 @@ stages: -DWITH_MPI=${WITH_MPI} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} - -DWITH_MIC=${WITH_MIC} -DWITH_TESTS=${WITH_TESTS} -DWITH_DOC=${WITH_DOC} -DWITH_COVERAGE=${WITH_COVERAGE} diff --git a/CMakeLists.txt b/CMakeLists.txt index 9540fe0028c4647db0e16cb7c7864c119772c2fd..78c7f3dcd94c0113ed52b9ae6ffd35141a121c45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,6 @@ set( tnlVersion "0.1" ) # declare all custom build options option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF) -option(WITH_MIC "Build with MIC support" OFF) option(WITH_CUDA "Build with CUDA support" ON) set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures") option(WITH_OPENMP "Build with OpenMP support" ON) @@ -83,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set Debug/Release options -set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) +set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) # pass -rdynamic only in Debug mode @@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} ) endif() endif() -if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570") - ##### - # Check for MIC - # - if( ${WITH_MIC} ) - message( "Enabled MIC support." ) - set( MIC_CXX_FLAGS "-DHAVE_MIC") - # build all tests with MIC support - set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC ) - set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" ) - else() - set( MIC_CXX_FLAGS "") - endif() -endif() - # force colorized output in continuous integration if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" ) message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.") @@ -355,7 +338,6 @@ INCLUDE( CPack ) # Print custom build options message( "-- Build options:" ) message( " OFFLINE_BUILD = ${OFFLINE_BUILD}" ) -message( " WITH_MIC = ${WITH_MIC}" ) message( " WITH_CUDA = ${WITH_CUDA}" ) message( " WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" ) message( " WITH_OPENMP = ${WITH_OPENMP}" ) diff --git a/Documentation/Examples/FileExampleCuda.cu b/Documentation/Examples/FileExampleCuda.cu index 0cfde8fc8bc3220cee4b7edae5532cd3ab204d70..4411b8c51bd0e1ed2285dfb2829b81416d35303f 100644 --- a/Documentation/Examples/FileExampleCuda.cu +++ b/Documentation/Examples/FileExampleCuda.cu @@ -17,7 +17,7 @@ int main() */ File file; file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc ); - file.save< double, double, Devices::Host >( doubleArray, size ); + file.save< double, double, Allocators::Host< double > >( doubleArray, size ); file.close(); /*** @@ -31,7 +31,7 @@ int main() * Read array from the file to device */ file.open( "file-example-cuda-test-file.tnl", std::ios_base::in ); - file.load< double, double, Devices::Cuda >( deviceArray, size ); + file.load< double, double, Allocators::Cuda< double > >( deviceArray, size ); file.close(); /*** diff --git a/Documentation/Examples/FileExampleSaveAndLoad.cpp b/Documentation/Examples/FileExampleSaveAndLoad.cpp index 00e353218b241e60659d78ad829ee78704b7641f..c232fc3fe7d76b13b6c488da369937f3a64c4f08 100644 --- a/Documentation/Examples/FileExampleSaveAndLoad.cpp +++ b/Documentation/Examples/FileExampleSaveAndLoad.cpp @@ -18,21 +18,21 @@ int main() */ File file; file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc ); - file.save< double, float, Devices::Host >( doubleArray, size ); + file.save< double, float >( doubleArray, size ); file.close(); /*** * Load the array of floats from the file. */ file.open( "test-file.tnl", std::ios_base::in ); - file.load< float, float, Devices::Host >( floatArray, size ); + file.load< float, float >( floatArray, size ); file.close(); /*** * Load the array of floats from the file and convert them to integers. */ file.open( "test-file.tnl", std::ios_base::in ); - file.load< int, float, Devices::Host >( intArray, size ); + file.load< int, float >( intArray, size ); file.close(); /*** diff --git a/Documentation/Examples/ObjectExample_getType.cpp b/Documentation/Examples/ObjectExample_getType.cpp index 7cc7476d6cc90debc1e495eab8b84959619881f7..7c45a167d751812d37aa9eca3316c777ef70567a 100644 --- a/Documentation/Examples/ObjectExample_getType.cpp +++ b/Documentation/Examples/ObjectExample_getType.cpp @@ -1,5 +1,5 @@ #include <iostream> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> #include <TNL/Object.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> @@ -13,24 +13,12 @@ class MyArray : public Object { public: - using HostType = MyArray< Value, Devices::Host >; - - static String getType() - { - return "MyArray< " + TNL::getType< Value >() + ", " + TNL::getType< Device >() + " >"; - } - - String getTypeVirtual() const - { - return getType(); - } - static String getSerializationType() { - return HostType::getType(); + return "MyArray< " + TNL::getType< Value >() + ", " + getType< Devices::Host >() + " >"; } - String getSerializationTypeVirtual() const + virtual String getSerializationTypeVirtual() const override { return getSerializationType(); } @@ -47,11 +35,11 @@ int main() Object* cudaArrayPtr = &cudaArray; // Object types - cout << "HostArray type is " << HostArray::getType() << endl; - cout << "hostArrayPtr type is " << hostArrayPtr->getTypeVirtual() << endl; + cout << "HostArray type is " << getType< HostArray >() << endl; + cout << "hostArrayPtr type is " << getType( *hostArrayPtr ) << endl; - cout << "CudaArray type is " << CudaArray::getType() << endl; - cout << "cudaArrayPtr type is " << cudaArrayPtr->getTypeVirtual() << endl; + cout << "CudaArray type is " << getType< CudaArray >() << endl; + cout << "cudaArrayPtr type is " << getType( *cudaArrayPtr ) << endl; // Object serialization types cout << "HostArray serialization type is " << HostArray::getSerializationType() << endl; @@ -60,4 +48,3 @@ int main() cout << "CudaArray serialization type is " << CudaArray::getSerializationType() << endl; cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl; } - diff --git a/Documentation/Examples/StringExample.cpp b/Documentation/Examples/StringExample.cpp index 609e2a26981362a663f2a92e6a82c6f86a94e41c..a86182d6574cde1d8ee2e65f2d0a14d251c837a7 100644 --- a/Documentation/Examples/StringExample.cpp +++ b/Documentation/Examples/StringExample.cpp @@ -1,6 +1,5 @@ #include <iostream> #include <TNL/String.h> -#include <TNL/Containers/List.h> #include <TNL/File.h> using namespace TNL; diff --git a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp index d0b66adb4febd515b5296d36909f551b58c8dc3b..4c3a17268cc1107aaca84d911cb1f4b4f5cb8a28 100644 --- a/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ComparisonExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp index 9ccb5baa86c26dabb155226207b6dfd551c595dc..29817aa1427405142a2feb07362f9ad443fa4b39 100644 --- a/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ExclusiveScanExample.cpp @@ -4,7 +4,7 @@ using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > void scan( Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp index 8d21107fdc444872783c057a329f5c812f83527e..2fb76623836dc553d26d2f7e0fb49b0755b7ea79 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-1.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double mapReduce( Vector< double, Device >& u ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp index f44cac9180fb68543750be923e1b9bfc9cd85324..10fb0b49966fa2ccbb8b9eb6273ca726076499d2 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-2.cpp @@ -1,12 +1,12 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double mapReduce( Vector< double, Device >& u ) diff --git a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp index 1125b605c8264b69377c2c5c1c31a385d7033db9..de8c4bab658c42f38a199c1f95075490101e3420 100644 --- a/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MapReduceExample-3.cpp @@ -1,12 +1,12 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> #include <TNL/Timer.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double mapReduce( Vector< double, Device >& u ) diff --git a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp index 8e4dbc740fe4070788509bcfd951dd759a98dea9..ca4b8c8a481ec49e23d7770923dd19c27316f7fa 100644 --- a/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/MaximumNormExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double maximumNorm( const Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp index 8be11efa32111e298ba0616f8634ba359802c2d9..e2691e40a7544306322841a69047d9d5d0b52dee 100644 --- a/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ProductExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double product( const Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp index e37a21b6e4f44d8ea4acce8a8c10a8cb1d9061c1..000af86feb4a0b96357a6ce2a4c1dd0c8829b5d3 100644 --- a/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ReductionWithArgument.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > std::pair< int, double > diff --git a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp index 5bfd759edcab7f70f3940918a03eb732f75a689b..c072e09ba4d2f1c48c051eb6979db5956376ccf6 100644 --- a/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ScalarProductExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp index 38f44ccddac7eea5a92395180804ebefb1bef17b..3dbd8581d1932933e20f3011b226fe1f3ce9bcf6 100644 --- a/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/ScanExample.cpp @@ -4,7 +4,7 @@ using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > void scan( Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp index b3f67763906d7770cb65cf36a637eb0379928736..5e1379f5d572007beeee1fdcc6671c1240cc8973 100644 --- a/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/SegmentedScanExample.cpp @@ -4,7 +4,7 @@ using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags ) diff --git a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp index 3be04bd9245c8aef312d421f2bb4d68e4aacbea2..3cf648a5774e17331b9716196939c6c988496ff2 100644 --- a/Documentation/Tutorials/ReductionAndScan/SumExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/SumExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double sum( const Vector< double, Device >& v ) diff --git a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp index bf93cd58b40e38a6d77df29f8aeb2c0d344664a9..4e44abe570bd4d701cd2712d8730374dbf940c24 100644 --- a/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp +++ b/Documentation/Tutorials/ReductionAndScan/UpdateAndResidueExample.cpp @@ -1,11 +1,11 @@ #include <iostream> #include <cstdlib> #include <TNL/Containers/Vector.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename Device > double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau ) diff --git a/Documentation/Tutorials/Vectors/Reduction.cpp b/Documentation/Tutorials/Vectors/Reduction.cpp index 1d76d8d0405d815d01b9f5ddc155694d76e64067..33768b07f456e38e14b8bfadd7466233075de47f 100644 --- a/Documentation/Tutorials/Vectors/Reduction.cpp +++ b/Documentation/Tutorials/Vectors/Reduction.cpp @@ -24,7 +24,6 @@ void expressions() b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } ); c = -5; - int arg; std::cout << "a = " << a << std::endl; std::cout << "b = " << b << std::endl; std::cout << "c = " << c << std::endl; diff --git a/README.md b/README.md index a8a4b749e4aa518e7501db2690c6a791691bba77..371782d714aa228b2cc3b125df21500bc5b8fe34 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into several modules: - _Core concepts_. - The main concept used in the TNL is the `Device` type which is used in most of - the other parts of the library. For data structures such as `Array` it - specifies where the data should be allocated, whereas for algorithms such as - `ParallelFor` it specifies how the algorithm should be executed. + The main concepts used in TNL are the _memory space_, which represents the + part of memory where given data is allocated, and the _execution model_, + which represents the way how given (typically parallel) algorithm is executed. + For example, data can be allocated in the main system memory, in the GPU + memory, or using the CUDA Unified Memory which can be accessed from the host + as well as from the GPU. On the other hand, algorithms can be executed using + either the host CPU or an accelerator (GPU), and for each there are many ways + to manage parallel execution. The usage of memory spaces is abstracted with + [allocators][allocators] and the execution model is represented by + [devices][devices]. See the [Core concepts][core concepts] page for details. - _[Containers][containers]_. TNL provides generic containers such as array, multidimensional array or array - views, which abstract data management on different hardware architectures. + views, which abstract data management and execution of common operations on + different hardware architectures. - _Linear algebra._ TNL provides generic data structures and algorithms for linear algebra, such as [vectors][vectors], [sparse matrices][matrices], @@ -39,6 +46,9 @@ several modules: [libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or [libjpeg](http://libjpeg.sourceforge.net/) for JPEG files. +[allocators]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Allocators.html +[devices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Devices.html +[core concepts]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/core_concepts.html [containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html [vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html [matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html diff --git a/build b/build index c1e0d3162a9585c41a1050d30cecc442fb8d2173..914c65b1971bd0b895f5c2aa6f093ee83dd132a2 100755 --- a/build +++ b/build @@ -48,7 +48,6 @@ do --offline-build ) OFFLINE_BUILD="yes" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;; - --with-mic=* ) WITH_MIC="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; --with-openmp=* ) WITH_OPENMP="${option#*=}" ;; @@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then echo " --install=yes/no Enables the installation of TNL files." echo " --offline-build=yes/no Disables online updates during the build. 'no' by default." echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)." - echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." @@ -110,7 +108,12 @@ else export CC=gcc fi -if hash ninja 2>/dev/null; then +if [[ ! $(command -v cmake) ]]; then + echo "Error: cmake is not installed. See http://www.cmake.org/download/" >&2 + exit 1 +fi + +if [[ $(command -v ninja) ]]; then generator=Ninja make=ninja check_file="build.ninja" @@ -126,7 +129,6 @@ cmake_command=( -DCMAKE_BUILD_TYPE=${BUILD} -DCMAKE_INSTALL_PREFIX=${PREFIX} -DOFFLINE_BUILD=${OFFLINE_BUILD} - -DWITH_MIC=${WITH_MIC} -DWITH_CUDA=${WITH_CUDA} -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_OPENMP=${WITH_OPENMP} diff --git a/install b/install index 6f0770367953585ee30ad634dd308b4a952eadeb..fe138dfaa005539a87e7ccbb9a8746143c4cbb0e 100755 --- a/install +++ b/install @@ -1,59 +1,51 @@ #!/bin/bash +set -e + BUILD_DEBUG="yes" BUILD_RELEASE="yes" OPTIONS="" -CMAKE_TEST=`which cmake` -if test x${CMAKE_TEST} = "x"; -then - echo "Cmake is not installed on your system. Please install it by:" - echo "" - echo " sudo apt-get install cmake on Ubuntu and Debian based systems" - echo " sudo yum install cmake on RedHat, Fedora or CentOS" - echo " sudo zypper install cmake on OpenSuse" - echo "" - echo "You may also install it from the source code at:" - echo " http://www.cmake.org/download/" - exit 1 -fi - -for option in "$@" -do - case $option in - --no-debug ) BUILD_DEBUG="no" ;; - --no-release ) BUILD_RELEASE="no" ;; - * ) OPTIONS="${OPTIONS} ${option}" ;; - esac +for option in "$@"; do + case $option in + --no-debug) + BUILD_DEBUG="no" + ;; + --no-release) + BUILD_RELEASE="no" + ;; + --build=* ) + BUILD="${option#*=}" + if [[ "$BUILD" != "Release" ]]; then + BUILD_RELEASE="no" + fi + if [[ "$BUILD" != "Debug" ]]; then + BUILD_DEBUG="no" + fi + ;; + *) + OPTIONS="${OPTIONS} ${option}" + ;; + esac done -if test ${BUILD_DEBUG} = "yes"; -then - if [ ! -d Debug ]; - then - mkdir Debug - fi - cd Debug - if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS} - then - exit 1 - fi - cd .. +if [[ ${BUILD_DEBUG} == "yes" ]]; then + if [[ ! -d Debug ]]; then + mkdir Debug + fi + pushd Debug + ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS} + popd fi -if test ${BUILD_RELEASE} = "yes"; -then - if [ ! -d Release ]; - then - mkdir Release - fi - cd Release - if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; - then - exit 1 - fi - cd .. +if [[ ${BUILD_RELEASE} == "yes" ]]; then + if [[ ! -d Release ]]; then + mkdir Release + fi + pushd Release + ../build --root-dir=.. --build=Release --install=yes ${OPTIONS}; + popd fi diff --git a/src/Benchmarks/BLAS/CommonVectorOperations.hpp b/src/Benchmarks/BLAS/CommonVectorOperations.hpp index 640fda337b5d8a8a6dcec75f081702eccb45464c..13a0f63229dd45d2726318f77f3bd0a2b92519cd 100644 --- a/src/Benchmarks/BLAS/CommonVectorOperations.hpp +++ b/src/Benchmarks/BLAS/CommonVectorOperations.hpp @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> #include "CommonVectorOperations.h" namespace TNL { @@ -30,7 +30,7 @@ getVectorMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -47,7 +47,7 @@ getVectorMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > @@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; - return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > @@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v, const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; - return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -155,7 +155,7 @@ getVectorSum( const Vector& v ) const auto* data = v.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > @@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); } template< typename Device > @@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1, const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); } template< typename Device > @@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > @@ -276,7 +276,7 @@ getVectorDifferenceL2Norm( const Vector1& v1, auto diff = data1[ i ] - data2[ i ]; return diff * diff; }; - return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); + return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); } template< typename Device > @@ -302,7 +302,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); }; - return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); + return std::pow( Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); } template< typename Device > @@ -321,7 +321,7 @@ getVectorDifferenceSum( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } template< typename Device > @@ -340,7 +340,7 @@ getScalarProduct( const Vector1& v1, const auto* data1 = v1.getData(); const auto* data2 = v2.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); + return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); } } // namespace Benchmarks diff --git a/src/Benchmarks/BLAS/VectorOperations.h b/src/Benchmarks/BLAS/VectorOperations.h index 0ad2c1ee6829aefbc184a1725afb75e665110724..4c9ad6cc5778cc6ff1bbfd873165d5add56ed17b 100644 --- a/src/Benchmarks/BLAS/VectorOperations.h +++ b/src/Benchmarks/BLAS/VectorOperations.h @@ -12,7 +12,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Benchmarks { @@ -104,9 +104,9 @@ struct VectorOperations< Devices::Cuda > auto add2 = [=] __cuda_callable__ ( IndexType i ) { y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; }; if( thisMultiplicator == 1.0 ) - ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add1 ); else - ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _y.getSize(), add2 ); } template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > @@ -131,9 +131,9 @@ struct VectorOperations< Devices::Cuda > auto add2 = [=] __cuda_callable__ ( IndexType i ) { v[ i ] = thisMultiplicator * v[ i ] + multiplicator1 * v1[ i ] + multiplicator2 * v2[ i ]; }; if( thisMultiplicator == 1.0 ) - ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add1 ); else - ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (IndexType) 0, _v.getSize(), add2 ); } }; diff --git a/src/Benchmarks/BLAS/array-operations.h b/src/Benchmarks/BLAS/array-operations.h index cff60c8cca5549dc7c0341f94a9d0d137d5dfb0b..84767a7b1f97e96387c25e6e52356259c67505d2 100644 --- a/src/Benchmarks/BLAS/array-operations.h +++ b/src/Benchmarks/BLAS/array-operations.h @@ -12,6 +12,8 @@ #pragma once +#include <cstring> + #include "../Benchmarks.h" #include <TNL/Containers/Array.h> @@ -66,6 +68,36 @@ benchmarkArrayOperations( Benchmark & benchmark, reset12(); + if( std::is_fundamental< Real >::value ) { + // std::memcmp + auto compareHost = [&]() { + if( std::memcmp( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ) == 0 ) + resultHost = true; + else + resultHost = false; + }; + benchmark.setOperation( "comparison (memcmp)", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset12, "CPU", compareHost ); + + // std::memcpy and cudaMemcpy + auto copyHost = [&]() { + std::memcpy( hostArray.getData(), hostArray2.getData(), hostArray.getSize() * sizeof(Real) ); + }; + benchmark.setOperation( "copy (memcpy)", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset12, "CPU", copyHost ); +#ifdef HAVE_CUDA + auto copyCuda = [&]() { + cudaMemcpy( deviceArray.getData(), + deviceArray2.getData(), + deviceArray.getSize() * sizeof(Real), + cudaMemcpyDeviceToDevice ); + TNL_CHECK_CUDA_DEVICE; + }; + benchmark.time< Devices::Cuda >( reset12, "GPU", copyCuda ); +#endif + } + + auto compareHost = [&]() { resultHost = (int) ( hostArray == hostArray2 ); }; diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h index b6c91a2470648de91edf58f42cff912e1e2b32bc..d515d52d73d513d87b86d4b743d8b0e27b20e0ca 100644 --- a/src/Benchmarks/BLAS/spmv.h +++ b/src/Benchmarks/BLAS/spmv.h @@ -53,7 +53,7 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix, const int elementsPerRow, const int gridIdx ) { - const int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= matrix->getRows() ) return; int col = rowIdx - elementsPerRow / 2; @@ -73,12 +73,12 @@ void setCudaTestMatrix( Matrix& matrix, typedef typename Matrix::IndexType IndexType; typedef typename Matrix::RealType RealType; Pointers::DevicePointer< Matrix > kernel_matrix( matrix ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); setCudaTestMatrixKernel< Matrix > <<< cudaGridSize, cudaBlockSize >>> ( &kernel_matrix.template modifyData< Devices::Cuda >(), elementsPerRow, gridIdx ); @@ -109,7 +109,7 @@ benchmarkSpMV( Benchmark & benchmark, CudaVector deviceVector, deviceVector2; // create benchmark group - const std::vector< String > parsedType = parseObjectType( HostMatrix::getType() ); + const std::vector< String > parsedType = parseObjectType( getType< HostMatrix >() ); #ifdef HAVE_CUDA benchmark.createHorizontalGroup( parsedType[ 0 ], 2 ); #else diff --git a/src/Benchmarks/BLAS/tnl-benchmark-blas.h b/src/Benchmarks/BLAS/tnl-benchmark-blas.h index b510c383749c0104c3c87c8c111237dfdc5a35b9..a1bd3e92b0d2e11ac3f5377ffcf199d8a873ad60 100644 --- a/src/Benchmarks/BLAS/tnl-benchmark-blas.h +++ b/src/Benchmarks/BLAS/tnl-benchmark-blas.h @@ -182,7 +182,7 @@ main( int argc, char* argv[] ) runBlasBenchmarks< double >( benchmark, metadata, minSize, maxSize, sizeStepFactor, elementsPerRow ); if( ! benchmark.save( logFile ) ) { - std::cerr << "Failed to write the benchmark results to file '" << parameters.getParameter< String >( "log-file" ) << "'." << std::endl; + std::cerr << "Failed to write the benchmark results to file '" << logFileName << "'." << std::endl; return EXIT_FAILURE; } diff --git a/src/Benchmarks/BLAS/triad.h b/src/Benchmarks/BLAS/triad.h index c107944c893f3d799eabdfbaca5a3d32fd8a599a..3ac747fba5f386654a9558646868b8fb13671690 100644 --- a/src/Benchmarks/BLAS/triad.h +++ b/src/Benchmarks/BLAS/triad.h @@ -73,7 +73,7 @@ benchmarkTriad( Benchmark & benchmark, { a_v[i] = b_v[i] + scalar * c_v[i]; }; - ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); a_h = a_d; }; @@ -117,7 +117,7 @@ benchmarkTriad( Benchmark & benchmark, { a_v[i] = b_v[i] + scalar * c_v[i]; }; - ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); a_h = a_d; }; @@ -150,7 +150,7 @@ benchmarkTriad( Benchmark & benchmark, }; auto triad = [&]() { - ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); }; benchmark.time< Devices::Cuda >( reset, "zero-copy", triad ); @@ -181,7 +181,7 @@ benchmarkTriad( Benchmark & benchmark, }; auto triad = [&]() { - ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); + Algorithms::ParallelFor< Devices::Cuda >::exec( (long) 0, size, kernel ); }; benchmark.time< Devices::Cuda >( reset, "unified memory", triad ); diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index ce2114f313fa75e91ac15d02bb4d3bfdf78aef9a..7254ba9f4075c81f7100e4c6c86bd16c3b9077a7 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -562,31 +562,31 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif //// - // Inclusive prefix sum - auto inclusivePrefixSumHost = [&]() { - hostVector.prefixSum(); + // Inclusive scan + auto inclusiveScanHost = [&]() { + hostVector.scan(); }; - benchmark.setOperation( "inclusive prefix sum", 2 * datasetSize ); - benchmark.time< Devices::Host >( reset1, "CPU ET", inclusivePrefixSumHost ); + benchmark.setOperation( "inclusive scan", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset1, "CPU ET", inclusiveScanHost ); #ifdef HAVE_CUDA - auto inclusivePrefixSumCuda = [&]() { - deviceVector.prefixSum(); + auto inclusiveScanCuda = [&]() { + deviceVector.scan(); }; - benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusivePrefixSumCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", inclusiveScanCuda ); #endif //// - // Exclusive prefix sum - auto exclusivePrefixSumHost = [&]() { - hostVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + // Exclusive scan + auto exclusiveScanHost = [&]() { + hostVector.template scan< Algorithms::ScanType::Exclusive >(); }; - benchmark.setOperation( "exclusive prefix sum", 2 * datasetSize ); - benchmark.time< Devices::Host >( reset1, "CPU ET", exclusivePrefixSumHost ); + benchmark.setOperation( "exclusive scan", 2 * datasetSize ); + benchmark.time< Devices::Host >( reset1, "CPU ET", exclusiveScanHost ); #ifdef HAVE_CUDA - auto exclusivePrefixSumCuda = [&]() { - deviceVector.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + auto exclusiveScanCuda = [&]() { + deviceVector.template scan< Algorithms::ScanType::Exclusive >(); }; - benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusivePrefixSumCuda ); + benchmark.time< Devices::Cuda >( reset1, "GPU ET", exclusiveScanCuda ); #endif #ifdef HAVE_CUDA diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h index 683a18376276c4b0dbd194329226e9a517a4af12..4caf0fbda397f92d8cb7c143a12896b89600beb0 100644 --- a/src/Benchmarks/Benchmarks.h +++ b/src/Benchmarks/Benchmarks.h @@ -23,8 +23,8 @@ #include <TNL/String.h> #include <TNL/Devices/Host.h> -#include <TNL/Devices/SystemInfo.h> -#include <TNL/Devices/CudaDeviceInfo.h> +#include <TNL/SystemInfo.h> +#include <TNL/Cuda/DeviceInfo.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Communicators/MpiCommunicator.h> @@ -330,25 +330,25 @@ protected: }; -Benchmark::MetadataMap getHardwareMetadata() +inline Benchmark::MetadataMap getHardwareMetadata() { const int cpu_id = 0; - Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); + const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); String cacheInfo = convertToString( cacheSizes.L1data ) + ", " + convertToString( cacheSizes.L1instruction ) + ", " + convertToString( cacheSizes.L2 ) + ", " + convertToString( cacheSizes.L3 ); #ifdef HAVE_CUDA - const int activeGPU = Devices::CudaDeviceInfo::getActiveDevice(); - const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + - convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( activeGPU ) ); + const int activeGPU = Cuda::DeviceInfo::getActiveDevice(); + const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( activeGPU ) ) + "." + + convertToString( Cuda::DeviceInfo::getArchitectureMinor( activeGPU ) ); #endif Benchmark::MetadataMap metadata { - { "host name", Devices::SystemInfo::getHostname() }, - { "architecture", Devices::SystemInfo::getArchitecture() }, - { "system", Devices::SystemInfo::getSystemName() }, - { "system release", Devices::SystemInfo::getSystemRelease() }, - { "start time", Devices::SystemInfo::getCurrentTime() }, + { "host name", SystemInfo::getHostname() }, + { "architecture", SystemInfo::getArchitecture() }, + { "system", SystemInfo::getSystemName() }, + { "system release", SystemInfo::getSystemRelease() }, + { "start time", SystemInfo::getCurrentTime() }, #ifdef HAVE_MPI { "number of MPI processes", convertToString( (Communicators::MpiCommunicator::IsInitialized()) ? Communicators::MpiCommunicator::GetSize( Communicators::MpiCommunicator::AllGroup ) @@ -356,19 +356,19 @@ Benchmark::MetadataMap getHardwareMetadata() #endif { "OpenMP enabled", convertToString( Devices::Host::isOMPEnabled() ) }, { "OpenMP threads", convertToString( Devices::Host::getMaxThreadsCount() ) }, - { "CPU model name", Devices::SystemInfo::getCPUModelName( cpu_id ) }, - { "CPU cores", convertToString( Devices::SystemInfo::getNumberOfCores( cpu_id ) ) }, - { "CPU threads per core", convertToString( Devices::SystemInfo::getNumberOfThreads( cpu_id ) / Devices::SystemInfo::getNumberOfCores( cpu_id ) ) }, - { "CPU max frequency (MHz)", convertToString( Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, + { "CPU model name", SystemInfo::getCPUModelName( cpu_id ) }, + { "CPU cores", convertToString( SystemInfo::getNumberOfCores( cpu_id ) ) }, + { "CPU threads per core", convertToString( SystemInfo::getNumberOfThreads( cpu_id ) / SystemInfo::getNumberOfCores( cpu_id ) ) }, + { "CPU max frequency (MHz)", convertToString( SystemInfo::getCPUMaxFrequency( cpu_id ) / 1e3 ) }, { "CPU cache sizes (L1d, L1i, L2, L3) (kiB)", cacheInfo }, #ifdef HAVE_CUDA - { "GPU name", Devices::CudaDeviceInfo::getDeviceName( activeGPU ) }, + { "GPU name", Cuda::DeviceInfo::getDeviceName( activeGPU ) }, { "GPU architecture", deviceArch }, - { "GPU CUDA cores", convertToString( Devices::CudaDeviceInfo::getCudaCores( activeGPU ) ) }, - { "GPU clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, - { "GPU global memory (GB)", convertToString( (double) Devices::CudaDeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, - { "GPU memory clock rate (MHz)", convertToString( (double) Devices::CudaDeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, - { "GPU memory ECC enabled", convertToString( Devices::CudaDeviceInfo::getECCEnabled( activeGPU ) ) }, + { "GPU CUDA cores", convertToString( Cuda::DeviceInfo::getCudaCores( activeGPU ) ) }, + { "GPU clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getClockRate( activeGPU ) / 1e3 ) }, + { "GPU global memory (GB)", convertToString( (double) Cuda::DeviceInfo::getGlobalMemory( activeGPU ) / 1e9 ) }, + { "GPU memory clock rate (MHz)", convertToString( (double) Cuda::DeviceInfo::getMemoryClockRate( activeGPU ) / 1e3 ) }, + { "GPU memory ECC enabled", convertToString( Cuda::DeviceInfo::getECCEnabled( activeGPU ) ) }, #endif }; diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h index 81e5d3a6d05aff53fa56e6d6eb045104f9ac3c42..aa4b29424d2b93b323017e5501231a57874ccfa4 100644 --- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h @@ -73,8 +73,8 @@ benchmarkSpmvCuda( Benchmark& benchmark, { using RealType = typename Matrix::RealType; using IndexType = typename Matrix::IndexType; - using CudaMatrix = typename Matrix::CudaType; - using CudaVector = typename Vector::CudaType; + using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >; + using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >; CudaVector cuda_x; cuda_x = x; @@ -125,8 +125,8 @@ benchmarkDistributedSpmvCuda( Benchmark& benchmark, { using RealType = typename Matrix::RealType; using IndexType = typename Matrix::IndexType; - using CudaMatrix = typename Matrix::CudaType; - using CudaVector = typename Vector::CudaType; + using CudaMatrix = typename Matrix::template Self< RealType, Devices::Cuda >; + using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >; CudaVector cuda_x; cuda_x = x; diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h index 0a5494e2b157bbfa6ae5164b579d17e1f6aab43b..0c2fd92e309b53964a017b498dfd76ddf595e0bf 100644 --- a/src/Benchmarks/HeatEquation/BenchmarkLaplace.h +++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace.h @@ -33,8 +33,6 @@ class BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, In typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimension = MeshType::getMeshDimension() }; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -81,8 +79,6 @@ class BenchmarkLaplace< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, Ind typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimension = MeshType::getMeshDimension() }; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -144,8 +140,6 @@ class BenchmarkLaplace< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Ind typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimension = MeshType::getMeshDimension() }; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h index 34a2e245aad4fcc17c662cf9b72b009df9a7dcfb..47a67744153d970d12d9aee7fe478a16c3141d0d 100644 --- a/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h +++ b/src/Benchmarks/HeatEquation/BenchmarkLaplace_impl.h @@ -4,21 +4,6 @@ /**** * 1D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -BenchmarkLaplace< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "BenchmarkLaplace< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -108,21 +93,6 @@ setMatrixElements( const RealType& time, /**** * 2D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -BenchmarkLaplace< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "BenchmarkLaplace< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -288,21 +258,6 @@ setMatrixElements( const RealType& time, /**** * 3D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -BenchmarkLaplace< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "BenchmarkLaplace< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h index 998be646d6a296c272f69560ebe75ec507e98dd2..95491a1cfa1d5faf0557447834760fbb80c16340 100644 --- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h +++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem.h @@ -40,8 +40,6 @@ class HeatEquationBenchmarkProblem: using typename BaseType::DofVectorPointer; HeatEquationBenchmarkProblem(); - - static String getType(); String getPrologHeader() const; diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h index 14f1fd8a9e0a38a6aa335aacdbe886a155225811..3f0c9194867d011724e2387c882420e04e798ee6 100644 --- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h @@ -16,18 +16,6 @@ -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator, - typename Communicator > -String -HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >:: -getType() -{ - return String( "HeatEquationBenchmarkProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, @@ -94,9 +82,9 @@ setup( const Config::ParameterContainer& parameters, if( std::is_same< DeviceType, Devices::Cuda >::value ) { - this->cudaBoundaryConditions = Devices::Cuda::passToDevice( *this->boundaryConditionPointer ); - this->cudaRightHandSide = Devices::Cuda::passToDevice( *this->rightHandSidePointer ); - this->cudaDifferentialOperator = Devices::Cuda::passToDevice( *this->differentialOperatorPointer ); + this->cudaBoundaryConditions = Cuda::passToDevice( *this->boundaryConditionPointer ); + this->cudaRightHandSide = Cuda::passToDevice( *this->rightHandSidePointer ); + this->cudaDifferentialOperator = Cuda::passToDevice( *this->differentialOperatorPointer ); } this->explicitUpdater.setDifferentialOperator( this->differentialOperatorPointer ); this->explicitUpdater.setBoundaryConditions( this->boundaryConditionPointer ); @@ -278,8 +266,8 @@ boundaryConditionsTemplatedCompact( const GridType* grid, { typename GridType::CoordinatesType coordinates; - coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; + coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( coordinates.x() < end.x() && coordinates.y() < end.y() ) @@ -369,8 +357,8 @@ heatEquationTemplatedCompact( const GridType* grid, typedef typename GridType::IndexType IndexType; typedef typename GridType::RealType RealType; - coordinates.x() = begin.x() + ( gridXIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - coordinates.y() = begin.y() + ( gridYIdx * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; + coordinates.x() = begin.x() + ( gridXIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + coordinates.y() = begin.y() + ( gridYIdx * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; MeshFunction& u = *_u; MeshFunction& fu = *_fu; @@ -495,14 +483,14 @@ getExplicitUpdate( const RealType& time, CellType cell( mesh.template getData< DeviceType >() ); dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocks; - cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); - cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y ); - const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); - const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y ); + cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); + cudaBlocks.y = Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y ); + const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x ); + const IndexType cudaYGrids = Cuda::getNumberOfGrids( cudaBlocks.y ); //std::cerr << "Setting boundary conditions..." << std::endl; - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) boundaryConditionsTemplatedCompact< MeshType, CellType, BoundaryCondition, MeshFunctionType > @@ -606,7 +594,7 @@ getExplicitUpdate( const RealType& time, gridYSize / 16 + ( gridYSize % 16 != 0 ) ); */ - TNL::Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); int cudaErr; Meshes::Traverser< MeshType, Cell > meshTraverser; meshTraverser.template processInteriorEntities< UserData, @@ -774,10 +762,10 @@ template< typename Mesh, HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator, Communicator >:: ~HeatEquationBenchmarkProblem() { - if( this->cudaMesh ) Devices::Cuda::freeFromDevice( this->cudaMesh ); - if( this->cudaBoundaryConditions ) Devices::Cuda::freeFromDevice( this->cudaBoundaryConditions ); - if( this->cudaRightHandSide ) Devices::Cuda::freeFromDevice( this->cudaRightHandSide ); - if( this->cudaDifferentialOperator ) Devices::Cuda::freeFromDevice( this->cudaDifferentialOperator ); + if( this->cudaMesh ) Cuda::freeFromDevice( this->cudaMesh ); + if( this->cudaBoundaryConditions ) Cuda::freeFromDevice( this->cudaBoundaryConditions ); + if( this->cudaRightHandSide ) Cuda::freeFromDevice( this->cudaRightHandSide ); + if( this->cudaDifferentialOperator ) Cuda::freeFromDevice( this->cudaDifferentialOperator ); } diff --git a/src/Benchmarks/HeatEquation/TestGridEntity.h b/src/Benchmarks/HeatEquation/TestGridEntity.h index 3492b219807f4650ed665b2ee57c77754f5934f1..5be39bac1c342c9445e91c99bc868cd917935a16 100644 --- a/src/Benchmarks/HeatEquation/TestGridEntity.h +++ b/src/Benchmarks/HeatEquation/TestGridEntity.h @@ -78,8 +78,7 @@ class TestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimension typedef Containers::StaticVector< meshDimension, IndexType > EntityOrientationType; typedef Containers::StaticVector< meshDimension, IndexType > EntityBasisType; - typedef TestGridEntity< GridType, entityDimension > ThisType; - typedef TestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType; + typedef TestNeighborGridEntitiesStorage< TestGridEntity > NeighborGridEntitiesStorageType; __cuda_callable__ inline TestGridEntity( const GridType& grid ) diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h index cdbc4922ca07eda7da0ba442340705f6646d8430..7e7e5369182ebc579ab98da55bbacef872284edf 100644 --- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h +++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser.h @@ -12,7 +12,7 @@ #include <TNL/Meshes/Grid.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/CudaStreamPool.h> +#include <TNL/Cuda/StreamPool.h> namespace TNL { diff --git a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h index f3d9fbeec528dae97e4f3304f44b8440318d4529..c9fe0e43be20b175321dc1b50c563fb48e843b5d 100644 --- a/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h +++ b/src/Benchmarks/HeatEquation/Tuning/GridTraverser_impl.h @@ -8,8 +8,6 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Devices/MIC.h> - #pragma once #include "GridTraverser.h" @@ -128,8 +126,8 @@ _GridTraverser2D( typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx ); if( coordinates <= end ) { @@ -175,7 +173,7 @@ _GridTraverser2DBoundary( Index entitiesAlongX = endX - beginX + 1; Index entitiesAlongY = endY - beginY; - Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx ); if( threadId < entitiesAlongX ) { GridEntity entity( *grid, @@ -246,12 +244,12 @@ processEntities( dim3 cudaBlockSize( 256 ); dim3 cudaBlocksCount, cudaGridsCount; IndexType cudaThreadsCount = 2 * ( end.x() - begin.x() + end.y() - begin.y() + 1 ); - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount ); dim3 gridIdx, cudaGridSize; - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); + Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); _GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -268,20 +266,20 @@ processEntities( { dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocksCount, cudaGridsCount; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, - end.x() - begin.x() + 1, - end.y() - begin.y() + 1 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, + end.x() - begin.x() + 1, + end.y() - begin.y() + 1 ); - auto& pool = CudaStreamPool::getInstance(); + auto& pool = Cuda::StreamPool::getInstance(); const cudaStream_t& s = pool.getStream( stream ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); dim3 gridIdx, cudaGridSize; for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ ) { - Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); - //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); + Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); + //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); TNL::_GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), diff --git a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h index 67254ab3607c9c318f8d6a624387c5d875ee2484..59de340f2d7b23f1792aa0c48015df66bf405679 100644 --- a/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h +++ b/src/Benchmarks/HeatEquation/Tuning/SimpleCell.h @@ -26,8 +26,7 @@ class SimpleCell typedef typename GridType::IndexType IndexType; typedef typename GridType::CoordinatesType CoordinatesType; typedef typename GridType::PointType PointType; - typedef SimpleCell< GridType, Config > ThisType; - typedef Meshes::NeighborGridEntitiesStorage< ThisType, Config > + typedef Meshes::NeighborGridEntitiesStorage< SimpleCell, Config > NeighborGridEntitiesStorageType; typedef Config ConfigType; diff --git a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h index 084d6cc39a6a755b5ead53d791d22d2c327b37b9..a7a6fe39e4a6cc1ce0f584c71813bfbe070feaad 100644 --- a/src/Benchmarks/HeatEquation/tnlTestGrid2D.h +++ b/src/Benchmarks/HeatEquation/tnlTestGrid2D.h @@ -52,9 +52,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject typedef Index IndexType; typedef Containers::StaticVector< 2, Real > PointType; typedef Containers::StaticVector< 2, Index > CoordinatesType; - typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType; - typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType; - typedef Meshes::Grid< 2, Real, Device, Index > ThisType; static const int meshDimension = 2; @@ -78,10 +75,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject Grid(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; @@ -212,29 +205,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid() template< typename Real, typename Device, typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getType() +String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType() { return String( "Meshes::Grid< " ) + convertToString( getMeshDimension() ) + ", " + - String( ::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( ::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType() -{ - return HostType::getType(); + getType< RealType >() + ", " + + getType< Devices::Host >() + ", " + + getType< IndexType >() + " >"; }; template< typename Real, @@ -828,9 +805,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject typedef Index IndexType; typedef Containers::StaticVector< 2, Real > PointType; typedef Containers::StaticVector< 2, Index > CoordinatesType; - typedef Meshes::Grid< 2, Real, Devices::Host, Index > HostType; - typedef Meshes::Grid< 2, Real, tnlCuda, Index > CudaType; - typedef Meshes::Grid< 2, Real, Device, Index > ThisType; static const int meshDimension = 2; @@ -854,10 +828,6 @@ class Meshes::Grid< 2, Real, Device, Index > : public tnlObject Grid(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; @@ -977,29 +947,13 @@ Meshes::Grid< 2, Real, Device, Index > :: Grid() template< typename Real, typename Device, typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getType() +String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType() { return String( "Meshes::Grid< " ) + convertToString( getMeshDimension() ) + ", " + - String( ::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( ::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Meshes::Grid< 2, Real, Device, Index > :: getSerializationType() -{ - return HostType::getType(); + getType< RealType >() + ", " + + getType< Devices::Host >() + ", " + + getType< IndexType >() + " >"; }; template< typename Real, diff --git a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h index aa8bd8d057309b1cd48fe38d71eab7886ccc0d7c..4401e1e7207f39f51d489de8f16375ba438f97ad 100644 --- a/src/Benchmarks/HeatEquation/tnlTestGridEntity.h +++ b/src/Benchmarks/HeatEquation/tnlTestGridEntity.h @@ -55,8 +55,6 @@ class tnlTestGridEntity< Meshes::Grid< Dimension, Real, Device, Index >, Dimensi typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityOrientationType; typedef TNL::Containers::StaticVector< meshDimension, IndexType > EntityBasisType; - typedef tnlTestGridEntity< GridType, entityDimension, Config > ThisType; - //typedef tnlTestNeighborGridEntitiesStorage< ThisType > NeighborGridEntitiesStorageType; /*template< int NeighborEntityDimension = entityDimension > using NeighborEntities = diff --git a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h index 13c7848decb4f967dd04630a03e62bbb49249efa..a6434a01394e4ebfeb4d296f9640a563be967310 100644 --- a/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h +++ b/src/Benchmarks/HeatEquation/tnlTestNeighbourGridEntityGetter2D_impl.h @@ -93,7 +93,6 @@ class tnlTestNeighborGridEntityGetter< typedef typename GridType::CoordinatesType CoordinatesType; typedef GridEntityGetter< GridType, NeighborGridEntityType > GridEntityGetter; typedef GridEntityStencilStorageTag< GridEntityCrossStencil > StencilStorage; - typedef tnlTestNeighborGridEntityGetter< GridEntityType, 2, StencilStorage > ThisType; static const int stencilSize = Config::getStencilSize(); @@ -110,7 +109,7 @@ class tnlTestNeighborGridEntityGetter< public: __cuda_callable__ - static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex ) + static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex ) { neighborEntityGetter.stencilX[ index + stencilSize ] = entityIndex + index; } @@ -122,7 +121,7 @@ class tnlTestNeighborGridEntityGetter< public: __cuda_callable__ - static void exec( ThisType& neighborEntityGetter, const IndexType& entityIndex ) + static void exec( tnlTestNeighborGridEntityGetter& neighborEntityGetter, const IndexType& entityIndex ) { neighborEntityGetter.stencilY[ index + stencilSize ] = entityIndex + index * neighborEntityGetter.entity.getMesh().getDimensions().x(); diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 6661c5f6a8e720df9a265ca5cb3c5b97717294d5..0701b647a42416e6439cd02a1ef10157512210f3 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -119,8 +119,8 @@ benchmarkIterativeSolvers( Benchmark& benchmark, const Vector& b ) { #ifdef HAVE_CUDA - using CudaMatrix = typename Matrix::CudaType; - using CudaVector = typename Vector::CudaType; + using CudaMatrix = typename Matrix::template Self< typename Matrix::RealType, Devices::Cuda >; + using CudaVector = typename Vector::template Self< typename Vector::RealType, Devices::Cuda >; CudaVector cuda_x0, cuda_b; cuda_x0 = x0; @@ -130,7 +130,7 @@ benchmarkIterativeSolvers( Benchmark& benchmark, *cudaMatrixPointer = *matrixPointer; // synchronize shared pointers - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); #endif using namespace Solvers::Linear; @@ -461,9 +461,11 @@ struct LinearSolversBenchmark SharedPointer< CSR > matrixCopy; Matrices::copySparseMatrix( *matrixCopy, *matrixPointer ); - SharedPointer< typename CSR::CudaType > cuda_matrixCopy; + using CudaCSR = Matrices::CSR< RealType, Devices::Cuda, IndexType >; + using CudaVector = typename VectorType::template Self< RealType, Devices::Cuda >; + SharedPointer< CudaCSR > cuda_matrixCopy; *cuda_matrixCopy = *matrixCopy; - typename VectorType::CudaType cuda_x0, cuda_b; + CudaVector cuda_x0, cuda_b; cuda_x0.setLike( x0 ); cuda_b.setLike( b ); cuda_x0 = x0; diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h index b1fdd8c719077d0eff230081d9b62b92da3c76b4..285dd6f3d7f97aa9f90a0914ca88fe535661d7eb 100644 --- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h +++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray-boundary.h @@ -52,7 +52,8 @@ template< typename Array > void expect_eq( Array& a, Array& b ) { if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) { - typename Array::HostType a_host, b_host; + using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >; + HostArray a_host, b_host; a_host = a; b_host = b; expect_eq_chunked( a_host, b_host ); diff --git a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h index 0de53ea8815033654194cc9e2eb6f3eaf6356356..0c29b21b5894e46627f8c4f129eafc8697ae9aec 100644 --- a/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h +++ b/src/Benchmarks/NDArray/tnl-benchmark-ndarray.h @@ -14,7 +14,7 @@ #include <TNL/Assert.h> #include <TNL/Math.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/NDArray.h> #include <TNL/Containers/ndarray/Operations.h> @@ -54,7 +54,8 @@ template< typename Array > void expect_eq( Array& a, Array& b ) { if( std::is_same< typename Array::DeviceType, TNL::Devices::Cuda >::value ) { - typename Array::HostType a_host, b_host; + using HostArray = typename Array::template Self< typename Array::ValueType, TNL::Devices::Host >; + HostArray a_host, b_host; a_host = a; b_host = b; expect_eq_chunked( a_host, b_host ); @@ -98,7 +99,7 @@ void benchmark_array( Benchmark& benchmark, index_type size = 500000000 ) }; auto f = [&]() { - TNL::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() ); + Algorithms::ParallelFor< Device >::exec( 0, (int) size, kernel, a.getData(), b.getData() ); }; // warm-up for all benchmarks diff --git a/src/Benchmarks/ODESolvers/Euler.h b/src/Benchmarks/ODESolvers/Euler.h index c767eb33ec7b09bab4b53fe5ec54cbf981f73588..2df469d6f2e960cda1844dac35c05ed070c402ce 100644 --- a/src/Benchmarks/ODESolvers/Euler.h +++ b/src/Benchmarks/ODESolvers/Euler.h @@ -38,8 +38,6 @@ class Euler : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor > Euler(); - static String getType(); - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/Benchmarks/ODESolvers/Euler.hpp b/src/Benchmarks/ODESolvers/Euler.hpp index 1066e178c2f150c97514eb04dcd19a5a30932102..ab975ed078c470f4824d18e7848033e6fed73f2c 100644 --- a/src/Benchmarks/ODESolvers/Euler.hpp +++ b/src/Benchmarks/ODESolvers/Euler.hpp @@ -10,7 +10,6 @@ #pragma once -#include <TNL/Devices/MIC.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Communicators/NoDistrCommunicator.h> #include "ComputeBlockResidue.h" @@ -33,14 +32,6 @@ Euler< Problem, SolverMonitor >::Euler() { }; -template< typename Problem, typename SolverMonitor > -String Euler< Problem, SolverMonitor >::getType() -{ - return String( "Euler< " ) + - Problem :: getType() + - String( " >" ); -}; - template< typename Problem, typename SolverMonitor > void Euler< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config, const String& prefix ) @@ -185,10 +176,10 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u, { #ifdef HAVE_CUDA dim3 cudaBlockSize( 512 ); - const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); - const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks ); - this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) ); - const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x; + const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); + const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks ); + this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) ); + const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x; localResidue = 0.0; for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) @@ -196,7 +187,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u, const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType ); const IndexType gridOffset = gridIdx * threadsPerGrid; const IndexType currentSize = min( size - gridOffset, threadsPerGrid ); - const IndexType currentGridSize = Devices::Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x ); + const IndexType currentGridSize = Cuda::getNumberOfBlocks( currentSize, cudaBlockSize.x ); updateUEuler<<< currentGridSize, cudaBlockSize, sharedMemory >>>( currentSize, tau, @@ -209,28 +200,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u, } #endif } - - //MIC - if( std::is_same< DeviceType, Devices::MIC >::value ) - { -#ifdef HAVE_MIC - Devices::MICHider<RealType> mu; - mu.pointer=_u; - Devices::MICHider<RealType> mk1; - mk1.pointer=_k1; - #pragma offload target(mic) in(mu,mk1,size) inout(localResidue) - { - #pragma omp parallel for reduction(+:localResidue) firstprivate( mu, mk1 ) - for( IndexType i = 0; i < size; i ++ ) - { - const RealType add = tau * mk1.pointer[ i ]; - mu.pointer[ i ] += add; - localResidue += std::fabs( add ); - } - } -#endif - } localResidue /= tau * ( RealType ) size; Problem::CommunicatorType::Allreduce( &localResidue, ¤tResidue, 1, MPI_SUM, Problem::CommunicatorType::AllGroup ); //std::cerr << "Local residue = " << localResidue << " - globalResidue = " << currentResidue << std::endl; diff --git a/src/Benchmarks/ODESolvers/Merson.h b/src/Benchmarks/ODESolvers/Merson.h index 8d00667c2aba372525bca19462c49a5e430e3e79..74e052705db27b18ec6e0b61a7fb8e1c863c4032 100644 --- a/src/Benchmarks/ODESolvers/Merson.h +++ b/src/Benchmarks/ODESolvers/Merson.h @@ -35,8 +35,6 @@ class Merson : public Solvers::ODE::ExplicitSolver< Problem, SolverMonitor > Merson(); - static String getType(); - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/Benchmarks/ODESolvers/Merson.hpp b/src/Benchmarks/ODESolvers/Merson.hpp index 3a5cdf660b82d6f48509172fb1eee3e3fadb5760..3c74bdf480c66025f44de201e18ad5368d050e6f 100644 --- a/src/Benchmarks/ODESolvers/Merson.hpp +++ b/src/Benchmarks/ODESolvers/Merson.hpp @@ -94,14 +94,6 @@ Merson< Problem, SolverMonitor >::Merson() } }; -template< typename Problem, typename SolverMonitor > -String Merson< Problem, SolverMonitor >::getType() -{ - return String( "Merson< " ) + - Problem::getType() + - String( " >" ); -}; - template< typename Problem, typename SolverMonitor > void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config, const String& prefix ) @@ -298,10 +290,10 @@ void Merson< Problem, SolverMonitor >::computeKFunctions( DofVectorPointer& u, { #ifdef HAVE_CUDA dim3 cudaBlockSize( 512 ); - const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); - const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks ); - this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) ); - const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x; + const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); + const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks ); + this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) ); + const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x; this->problem->getExplicitUpdate( time, tau, u, k1 ); cudaDeviceSynchronize(); @@ -392,10 +384,10 @@ typename Problem :: RealType Merson< Problem, SolverMonitor >::computeError( con { #ifdef HAVE_CUDA dim3 cudaBlockSize( 512 ); - const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); - const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks ); - this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) ); - const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x; + const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); + const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks ); + this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) ); + const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x; for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) { @@ -447,10 +439,10 @@ void Merson< Problem, SolverMonitor >::computeNewTimeLevel( const RealType time, { #ifdef HAVE_CUDA dim3 cudaBlockSize( 512 ); - const IndexType cudaBlocks = Devices::Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); - const IndexType cudaGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks ); - this->cudaBlockResidue.setSize( min( cudaBlocks, Devices::Cuda::getMaxGridSize() ) ); - const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x; + const IndexType cudaBlocks = Cuda::getNumberOfBlocks( size, cudaBlockSize.x ); + const IndexType cudaGrids = Cuda::getNumberOfGrids( cudaBlocks ); + this->cudaBlockResidue.setSize( min( cudaBlocks, Cuda::getMaxGridSize() ) ); + const IndexType threadsPerGrid = Cuda::getMaxGridSize() * cudaBlockSize.x; localResidue = 0.0; for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) diff --git a/src/Benchmarks/ODESolvers/SimpleProblem.h b/src/Benchmarks/ODESolvers/SimpleProblem.h index 6323264b8e1dc88bedf1076fbeb05b8cb80adee6..ff81fd18e4576672a89f35f54ff37eeed4ba9d86 100644 --- a/src/Benchmarks/ODESolvers/SimpleProblem.h +++ b/src/Benchmarks/ODESolvers/SimpleProblem.h @@ -13,7 +13,7 @@ #pragma once #include <TNL/Devices/Host.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Benchmarks { @@ -43,7 +43,7 @@ struct SimpleProblem { fu[ i ] = 1.0; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, u.getSize(), computeF, u, fu ); } template< typename Vector > diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark.h b/src/Benchmarks/Traversers/GridTraversersBenchmark.h index 72ca102bcc131067eec286390e819be91db22e04..01590f1221f7a451270234044e180a16ff589e02 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h index 8ec5cdf888b35185becbcda3841cc2cd46a9a176..9820af39274cd5c8db310b583c0595d2b64bf252 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_1D.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h index 3c2037f40b7e33da59d1af2a3b0552d49d06ebb2..0e9ae7f2f36a9d19a913a4437d988d5172592e5b 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_2D.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> diff --git a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h index 9dfeadb056461623f12b51992c3efee9a8c8767e..26b6413e43edbc86a3a02490580afc834bb0bde6 100644 --- a/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h +++ b/src/Benchmarks/Traversers/GridTraversersBenchmark_3D.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Vector.h> diff --git a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h index 63b3cc8c94a58b1616cb77c4fa43348bc53e93e8..dbe637d826fe4e0dcd593320fba11ad46588e9b3 100644 --- a/src/Benchmarks/Traversers/tnl-benchmark-traversers.h +++ b/src/Benchmarks/Traversers/tnl-benchmark-traversers.h @@ -19,7 +19,7 @@ #include <TNL/Config/ConfigDescription.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/List.h> using namespace TNL; diff --git a/src/Examples/CMakeLists.txt b/src/Examples/CMakeLists.txt index 4038095719828aed5da45f9b62da30ac120d74cd..493f537d11ef6b6c54f42d476aca4d08cedf17cb 100644 --- a/src/Examples/CMakeLists.txt +++ b/src/Examples/CMakeLists.txt @@ -12,6 +12,5 @@ add_subdirectory( flow-vl ) ADD_EXECUTABLE( ConfigDescriptionExample ConfigDescriptionExample.cpp ) -ADD_EXECUTABLE( ListExample ListExample.cpp ) ADD_EXECUTABLE( LoggerExample LoggerExample.cpp ) ADD_EXECUTABLE( MathExample MathExample.cpp ) diff --git a/src/Examples/ListExample.cpp b/src/Examples/ListExample.cpp deleted file mode 100644 index 7196dc7594689bca4395976f43b23c8b12d42eb5..0000000000000000000000000000000000000000 --- a/src/Examples/ListExample.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include <iostream> -#include <TNL/Config/ConfigDescription.h> -#include <TNL/Containers/List.h> -#include <TNL/Containers/Array.h> - -using namespace TNL; -using namespace std; - -int main() -{ - Containers::List< int > lst; - lst.isEmpty(); - - lst.Append(1); - lst.Append(3); - - lst.isEmpty(); - lst.getSize(); - - lst.Insert(2,1); - - Containers::Array<int> array; - lst.toArray(array); -} \ No newline at end of file diff --git a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644 --- a/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h +++ b/src/Examples/flow-sw/DensityBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h index 18eaff1101eccc7733eb5978b48807be82bba916..008a68bef0b7a81519f95c184163b6a8f1752a07 100644 --- a/src/Examples/flow-sw/DensityBoundaryConditionCavity.h +++ b/src/Examples/flow-sw/DensityBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644 --- a/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h +++ b/src/Examples/flow-sw/EnergyBoundaryConditionBoiler.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h index 3b49cd56e5ab6901716c86115561c26fbbbff973..0730c9ee227b0ed57ad6f389c3f957db0b8d0872 100644 --- a/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h +++ b/src/Examples/flow-sw/EnergyBoundaryConditionCavity.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/LaxFridrichsContinuity.h b/src/Examples/flow-sw/LaxFridrichsContinuity.h index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644 --- a/src/Examples/flow-sw/LaxFridrichsContinuity.h +++ b/src/Examples/flow-sw/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/flow-sw/LaxFridrichsEnergy.h b/src/Examples/flow-sw/LaxFridrichsEnergy.h index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644 --- a/src/Examples/flow-sw/LaxFridrichsEnergy.h +++ b/src/Examples/flow-sw/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumX.h b/src/Examples/flow-sw/LaxFridrichsMomentumX.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/flow-sw/LaxFridrichsMomentumX.h +++ b/src/Examples/flow-sw/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumY.h b/src/Examples/flow-sw/LaxFridrichsMomentumY.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/flow-sw/LaxFridrichsMomentumY.h +++ b/src/Examples/flow-sw/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/flow-sw/LaxFridrichsMomentumZ.h +++ b/src/Examples/flow-sw/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644 --- a/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h +++ b/src/Examples/flow-sw/MomentumXBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644 --- a/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h +++ b/src/Examples/flow-sw/MomentumXBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644 --- a/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h +++ b/src/Examples/flow-sw/MomentumYBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644 --- a/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h +++ b/src/Examples/flow-sw/MomentumYBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644 --- a/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h +++ b/src/Examples/flow-sw/MomentumZBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644 --- a/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h +++ b/src/Examples/flow-sw/MomentumZBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-sw/UpwindContinuity.h b/src/Examples/flow-sw/UpwindContinuity.h index fc599d3d9773c39752b72abcee6626150ea70c7a..d016cff6b5324de887e8eb80750343a357c69d6b 100644 --- a/src/Examples/flow-sw/UpwindContinuity.h +++ b/src/Examples/flow-sw/UpwindContinuity.h @@ -37,14 +37,6 @@ class UpwindContinuityBase typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer; typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer; - static String getType() - { - return String( "UpwindContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-sw/UpwindEnergy.h b/src/Examples/flow-sw/UpwindEnergy.h index 6c7e94ec87ce2e3fbd96e6affaeb91b1242d9246..8023631ba014e552d4a8353c1e9d44e27973c382 100644 --- a/src/Examples/flow-sw/UpwindEnergy.h +++ b/src/Examples/flow-sw/UpwindEnergy.h @@ -36,14 +36,6 @@ class UpwindEnergyBase UpwindEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "UpwindEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-sw/UpwindMomentumX.h b/src/Examples/flow-sw/UpwindMomentumX.h index edd3756208121de465185a84693a10671e587bac..939e4f555587adb7b5e5e79b81c95e7601e393de 100644 --- a/src/Examples/flow-sw/UpwindMomentumX.h +++ b/src/Examples/flow-sw/UpwindMomentumX.h @@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/UpwindMomentumY.h b/src/Examples/flow-sw/UpwindMomentumY.h index 4b5a7bcb26d049c2773790857d3f79246488b55b..7a4d3d050654d4db86a00e1e00e2e882088285fe 100644 --- a/src/Examples/flow-sw/UpwindMomentumY.h +++ b/src/Examples/flow-sw/UpwindMomentumY.h @@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/UpwindMomentumZ.h b/src/Examples/flow-sw/UpwindMomentumZ.h index 887eec977517e5850db2085835d8242d63605c96..c425887578dfd313d18ed8567d86044ef4f568dc 100644 --- a/src/Examples/flow-sw/UpwindMomentumZ.h +++ b/src/Examples/flow-sw/UpwindMomentumZ.h @@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-sw/navierStokesProblem.h b/src/Examples/flow-sw/navierStokesProblem.h index 0252a5c46dc392566cde6ad8d454d341d8331b05..0e79d19df771363816f60297d75e5c15da8938af 100644 --- a/src/Examples/flow-sw/navierStokesProblem.h +++ b/src/Examples/flow-sw/navierStokesProblem.h @@ -55,8 +55,6 @@ class navierStokesProblem: typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer; typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; using CommunicatorType = Communicator; - - static String getTypeStatic(); String getPrologHeader() const; diff --git a/src/Examples/flow-sw/navierStokesProblem_impl.h b/src/Examples/flow-sw/navierStokesProblem_impl.h index 886c9f03f4e981cd9533d72ba5f71809388c6438..96bdb48279af245451dd7c140c1675b71df87b05 100644 --- a/src/Examples/flow-sw/navierStokesProblem_impl.h +++ b/src/Examples/flow-sw/navierStokesProblem_impl.h @@ -30,18 +30,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename InviscidOperators, - typename Communicator > -String -navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >:: -getTypeStatic() -{ - return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644 --- a/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h +++ b/src/Examples/flow-vl/DensityBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h index 18eaff1101eccc7733eb5978b48807be82bba916..008a68bef0b7a81519f95c184163b6a8f1752a07 100644 --- a/src/Examples/flow-vl/DensityBoundaryConditionCavity.h +++ b/src/Examples/flow-vl/DensityBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644 --- a/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h +++ b/src/Examples/flow-vl/EnergyBoundaryConditionBoiler.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h index 3b49cd56e5ab6901716c86115561c26fbbbff973..0730c9ee227b0ed57ad6f389c3f957db0b8d0872 100644 --- a/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h +++ b/src/Examples/flow-vl/EnergyBoundaryConditionCavity.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/LaxFridrichsContinuity.h b/src/Examples/flow-vl/LaxFridrichsContinuity.h index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644 --- a/src/Examples/flow-vl/LaxFridrichsContinuity.h +++ b/src/Examples/flow-vl/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/flow-vl/LaxFridrichsEnergy.h b/src/Examples/flow-vl/LaxFridrichsEnergy.h index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644 --- a/src/Examples/flow-vl/LaxFridrichsEnergy.h +++ b/src/Examples/flow-vl/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumX.h b/src/Examples/flow-vl/LaxFridrichsMomentumX.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/flow-vl/LaxFridrichsMomentumX.h +++ b/src/Examples/flow-vl/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumY.h b/src/Examples/flow-vl/LaxFridrichsMomentumY.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/flow-vl/LaxFridrichsMomentumY.h +++ b/src/Examples/flow-vl/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/flow-vl/LaxFridrichsMomentumZ.h +++ b/src/Examples/flow-vl/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644 --- a/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h +++ b/src/Examples/flow-vl/MomentumXBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644 --- a/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h +++ b/src/Examples/flow-vl/MomentumXBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644 --- a/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h +++ b/src/Examples/flow-vl/MomentumYBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644 --- a/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h +++ b/src/Examples/flow-vl/MomentumYBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644 --- a/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h +++ b/src/Examples/flow-vl/MomentumZBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644 --- a/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h +++ b/src/Examples/flow-vl/MomentumZBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow-vl/UpwindContinuity.h b/src/Examples/flow-vl/UpwindContinuity.h index 20bae4fbb49fe4d1510f95f0ef4c2404873903f7..fff04e9bb250bffb9c4021e801bb506486e038b5 100644 --- a/src/Examples/flow-vl/UpwindContinuity.h +++ b/src/Examples/flow-vl/UpwindContinuity.h @@ -37,14 +37,6 @@ class UpwindContinuityBase typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer; typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer; - static String getType() - { - return String( "UpwindContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-vl/UpwindEnergy.h b/src/Examples/flow-vl/UpwindEnergy.h index 8fa7a046a24d37c832746d6f17396e39582f7f5e..b4570e60829f5fa02f8d059123297f1a38ad7f53 100644 --- a/src/Examples/flow-vl/UpwindEnergy.h +++ b/src/Examples/flow-vl/UpwindEnergy.h @@ -36,14 +36,6 @@ class UpwindEnergyBase UpwindEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "UpwindEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow-vl/UpwindMomentumX.h b/src/Examples/flow-vl/UpwindMomentumX.h index edd3756208121de465185a84693a10671e587bac..939e4f555587adb7b5e5e79b81c95e7601e393de 100644 --- a/src/Examples/flow-vl/UpwindMomentumX.h +++ b/src/Examples/flow-vl/UpwindMomentumX.h @@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -136,14 +127,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -268,14 +251,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/UpwindMomentumY.h b/src/Examples/flow-vl/UpwindMomentumY.h index 4b5a7bcb26d049c2773790857d3f79246488b55b..7a4d3d050654d4db86a00e1e00e2e882088285fe 100644 --- a/src/Examples/flow-vl/UpwindMomentumY.h +++ b/src/Examples/flow-vl/UpwindMomentumY.h @@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -238,14 +221,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/UpwindMomentumZ.h b/src/Examples/flow-vl/UpwindMomentumZ.h index 887eec977517e5850db2085835d8242d63605c96..c425887578dfd313d18ed8567d86044ef4f568dc 100644 --- a/src/Examples/flow-vl/UpwindMomentumZ.h +++ b/src/Examples/flow-vl/UpwindMomentumZ.h @@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow-vl/navierStokesProblem.h b/src/Examples/flow-vl/navierStokesProblem.h index 51cc5f014f40fb51f45782c5332baf868487fa2e..dbac46e749a0eda0e34a926ba5d1af3a964e820c 100644 --- a/src/Examples/flow-vl/navierStokesProblem.h +++ b/src/Examples/flow-vl/navierStokesProblem.h @@ -57,8 +57,6 @@ class navierStokesProblem: typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; using CommunicatorType = Communicator; - static String getTypeStatic(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/flow-vl/navierStokesProblem_impl.h b/src/Examples/flow-vl/navierStokesProblem_impl.h index 886c9f03f4e981cd9533d72ba5f71809388c6438..96bdb48279af245451dd7c140c1675b71df87b05 100644 --- a/src/Examples/flow-vl/navierStokesProblem_impl.h +++ b/src/Examples/flow-vl/navierStokesProblem_impl.h @@ -30,18 +30,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename InviscidOperators, - typename Communicator > -String -navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >:: -getTypeStatic() -{ - return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/flow/DensityBoundaryConditionBoiler.h b/src/Examples/flow/DensityBoundaryConditionBoiler.h index 6231f6780e24e3090c83832c9d47534a9c6104a8..e02f1b1e403c25802c9e8792daa972bec1faf071 100644 --- a/src/Examples/flow/DensityBoundaryConditionBoiler.h +++ b/src/Examples/flow/DensityBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -384,7 +382,6 @@ class DensityBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/DensityBoundaryConditionCavity.h b/src/Examples/flow/DensityBoundaryConditionCavity.h index c753d324a288a331dbfb9d73e341fe962f39889e..7611f682cd9f14e05c6c37f5e4b10a3743444373 100644 --- a/src/Examples/flow/DensityBoundaryConditionCavity.h +++ b/src/Examples/flow/DensityBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -234,7 +233,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -381,7 +379,6 @@ class DensityBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshIn typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef DensityBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef DensityBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/EnergyBoundaryConditionBoiler.h b/src/Examples/flow/EnergyBoundaryConditionBoiler.h index a99fdf0157bfcbca614374e8472ab9fe8a3b4f58..0090bc2452288f7af0b18fa7ce675cd252f4e423 100644 --- a/src/Examples/flow/EnergyBoundaryConditionBoiler.h +++ b/src/Examples/flow/EnergyBoundaryConditionBoiler.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -532,7 +530,6 @@ class EnergyBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/EnergyBoundaryConditionCavity.h b/src/Examples/flow/EnergyBoundaryConditionCavity.h index 60e55f4240ed1fcb8bc63e494c01faf61f899568..0ba8c80aae9c95ce3fef4cd3fb13a16fe15b7774 100644 --- a/src/Examples/flow/EnergyBoundaryConditionCavity.h +++ b/src/Examples/flow/EnergyBoundaryConditionCavity.h @@ -113,7 +113,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -252,7 +251,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -476,7 +474,6 @@ class EnergyBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, MeshInd typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef EnergyBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef EnergyBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/LaxFridrichsContinuity.h b/src/Examples/flow/LaxFridrichsContinuity.h index bf3cc45ece7877291a53cb460dd874fa77bbd250..8a9d22c6a3aaf9d38713f0e0f91d3a859476e9c1 100644 --- a/src/Examples/flow/LaxFridrichsContinuity.h +++ b/src/Examples/flow/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/flow/LaxFridrichsContinuityEuler.h b/src/Examples/flow/LaxFridrichsContinuityEuler.h index f444a4e2541e76addb5c7a3eba87cf1d946ee4fa..ce175d8071028227427e603190862768394a96fa 100644 --- a/src/Examples/flow/LaxFridrichsContinuityEuler.h +++ b/src/Examples/flow/LaxFridrichsContinuityEuler.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/flow/LaxFridrichsEnergy.h b/src/Examples/flow/LaxFridrichsEnergy.h index dd940243d7fbaa59ae66d013451cd24c2def8488..630a985fe4c2b2b3637ef8ece3579e13ef25bd2e 100644 --- a/src/Examples/flow/LaxFridrichsEnergy.h +++ b/src/Examples/flow/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow/LaxFridrichsEnergyEuler.h b/src/Examples/flow/LaxFridrichsEnergyEuler.h index 30180639d18c4d36b688eb60d597a571e5647115..37cd793a7be8fa94a13d2979a390470c779940c2 100644 --- a/src/Examples/flow/LaxFridrichsEnergyEuler.h +++ b/src/Examples/flow/LaxFridrichsEnergyEuler.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/flow/LaxFridrichsMomentumX.h b/src/Examples/flow/LaxFridrichsMomentumX.h index 3e295c029f9bc4ae61dfc54650be4c4aff55cf18..8fe02be4033c9bd259d22133bff7175a653cd878 100644 --- a/src/Examples/flow/LaxFridrichsMomentumX.h +++ b/src/Examples/flow/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -128,14 +119,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -242,14 +225,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/LaxFridrichsMomentumXEuler.h b/src/Examples/flow/LaxFridrichsMomentumXEuler.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/flow/LaxFridrichsMomentumXEuler.h +++ b/src/Examples/flow/LaxFridrichsMomentumXEuler.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/LaxFridrichsMomentumY.h b/src/Examples/flow/LaxFridrichsMomentumY.h index 0df12c5227981b42b64437a4be96a511cf1b5991..61c3e09dc6a12f91241aa6308c61918c8cce5900 100644 --- a/src/Examples/flow/LaxFridrichsMomentumY.h +++ b/src/Examples/flow/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -222,14 +205,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/LaxFridrichsMomentumYEuler.h b/src/Examples/flow/LaxFridrichsMomentumYEuler.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/flow/LaxFridrichsMomentumYEuler.h +++ b/src/Examples/flow/LaxFridrichsMomentumYEuler.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/LaxFridrichsMomentumZ.h b/src/Examples/flow/LaxFridrichsMomentumZ.h index e4f8501ec1f3c44f1a39fb2a5aa85de5209f9635..37056b5ae2e293781ba68270cac559fad8f4b06b 100644 --- a/src/Examples/flow/LaxFridrichsMomentumZ.h +++ b/src/Examples/flow/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/LaxFridrichsMomentumZEuler.h b/src/Examples/flow/LaxFridrichsMomentumZEuler.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/flow/LaxFridrichsMomentumZEuler.h +++ b/src/Examples/flow/LaxFridrichsMomentumZEuler.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h index dfe63e07623a6fdb6ca7ebb7da8ab445d9505372..6a921539c13c4b0958f01ffab8b357435c43c969 100644 --- a/src/Examples/flow/MomentumXBoundaryConditionBoiler.h +++ b/src/Examples/flow/MomentumXBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -418,7 +416,6 @@ class MomentumXBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/MomentumXBoundaryConditionCavity.h b/src/Examples/flow/MomentumXBoundaryConditionCavity.h index 07abfdbeb940039555ac2799d0ef374ca26faff0..f27dda7f46c5766a848c94664e652b002c8dbe51 100644 --- a/src/Examples/flow/MomentumXBoundaryConditionCavity.h +++ b/src/Examples/flow/MomentumXBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -407,7 +405,6 @@ class MomentumXBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumXBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumXBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h index 83b6282ddd50033f32a382f0b48f5abe7347ccaa..2a8e06f2f611d11f545665b2a53f6b8f7a3f3cae 100644 --- a/src/Examples/flow/MomentumYBoundaryConditionBoiler.h +++ b/src/Examples/flow/MomentumYBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -413,7 +411,6 @@ class MomentumYBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/MomentumYBoundaryConditionCavity.h b/src/Examples/flow/MomentumYBoundaryConditionCavity.h index a83dd653f92328814b8d0746bc45c8775552a310..35c01409cb3188d02ee617a313267fbdd88b85ac 100644 --- a/src/Examples/flow/MomentumYBoundaryConditionCavity.h +++ b/src/Examples/flow/MomentumYBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -404,7 +402,6 @@ class MomentumYBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumYBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumYBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h index 9d887857ce97f916dcfaccd0208138afc200afd1..cf790d77d0e2090cce60dc3309094ca5122b0318 100644 --- a/src/Examples/flow/MomentumZBoundaryConditionBoiler.h +++ b/src/Examples/flow/MomentumZBoundaryConditionBoiler.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsBoiler< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsBoiler< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsBoilerBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/MomentumZBoundaryConditionCavity.h b/src/Examples/flow/MomentumZBoundaryConditionCavity.h index 5fe6f22e5945513c9e9e86d835256ef84e27c054..a771ab84f84d2c5cf22b3d57d7ee9f2a91ef82bf 100644 --- a/src/Examples/flow/MomentumZBoundaryConditionCavity.h +++ b/src/Examples/flow/MomentumZBoundaryConditionCavity.h @@ -111,7 +111,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 1, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 1, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -242,7 +241,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 2, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 2, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; @@ -394,7 +392,6 @@ class MomentumZBoundaryConditionsCavity< Meshes::Grid< 3, MeshReal, Device, Mesh typedef Containers::Vector< RealType, DeviceType, IndexType> DofVectorType; typedef Containers::StaticVector< 3, RealType > PointType; typedef typename MeshType::CoordinatesType CoordinatesType; - typedef MomentumZBoundaryConditionsCavity< MeshType, Function, Real, Index > ThisType; typedef MomentumZBoundaryConditionsCavityBase< Function > BaseType; typedef CompressibleConservativeVariables< MeshType > CompressibleConservativeVariablesType; typedef Pointers::SharedPointer< CompressibleConservativeVariablesType > CompressibleConservativeVariablesPointer; diff --git a/src/Examples/flow/navierStokesProblem.h b/src/Examples/flow/navierStokesProblem.h index f42c2ed09908b4c73cfab906f9fcecfe5101201d..71e8243cdac7084aab484f81be18acbc33baad0b 100644 --- a/src/Examples/flow/navierStokesProblem.h +++ b/src/Examples/flow/navierStokesProblem.h @@ -57,8 +57,6 @@ class navierStokesProblem: typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; using CommunicatorType = Communicator; - static String getTypeStatic(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/flow/navierStokesProblem_impl.h b/src/Examples/flow/navierStokesProblem_impl.h index 4b0c7977441e87cab05fccab2c3984705670cfd4..c2c84e7a6da2ad965c3c9afd454560d56e12dfa4 100644 --- a/src/Examples/flow/navierStokesProblem_impl.h +++ b/src/Examples/flow/navierStokesProblem_impl.h @@ -42,18 +42,6 @@ */ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename InviscidOperators, - typename Communicator > -String -navierStokesProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >:: -getTypeStatic() -{ - return String( "navierStokesProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/heat-equation/CMakeLists.txt b/src/Examples/heat-equation/CMakeLists.txt index c895199060481cd4e20e22c30a4a5b473faeb552..979c34076cd5588921cfaea29e10d4ef712f7a79 100644 --- a/src/Examples/heat-equation/CMakeLists.txt +++ b/src/Examples/heat-equation/CMakeLists.txt @@ -1,18 +1,17 @@ -set( tnl_heat_equation_SOURCES +set( tnl_heat_equation_SOURCES tnl-heat-equation.cpp tnl-heat-equation-eoc.cpp tnl-heat-equation.cu tnl-heat-equation-eoc.cu ) - + IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cu) CUDA_ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cu) target_link_libraries (tnl-heat-equation ${CUSPARSE_LIBRARY} ) target_link_libraries (tnl-heat-equation-eoc-test ${CUSPARSE_LIBRARY} ) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp) - ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp) - TARGET_COMPILE_DEFINITIONS( tnl-heat-equation PUBLIC ${MIC_CXX_FLAGS} ) +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp) + ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp) ENDIF( BUILD_CUDA ) @@ -20,7 +19,7 @@ INSTALL( TARGETS tnl-heat-equation tnl-heat-equation-eoc-test RUNTIME DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - + INSTALL( FILES tnl-run-heat-equation-eoc-test tnl-run-heat-equation ${tnl_heat_equation_SOURCES} diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644 --- a/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h +++ b/src/Examples/inviscid-flow-sw/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644 --- a/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h +++ b/src/Examples/inviscid-flow-sw/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h +++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h +++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h +++ b/src/Examples/inviscid-flow-sw/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/UpwindContinuity.h b/src/Examples/inviscid-flow-sw/UpwindContinuity.h index 22fc4ffc5d97e2933d7db36ceac3f66d5a33b63f..6a763635cb2ec8d08c2bd8549b89356bb4bed8be 100644 --- a/src/Examples/inviscid-flow-sw/UpwindContinuity.h +++ b/src/Examples/inviscid-flow-sw/UpwindContinuity.h @@ -37,14 +37,6 @@ class UpwindContinuityBase typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer; typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer; - static String getType() - { - return String( "UpwindContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-sw/UpwindEnergy.h b/src/Examples/inviscid-flow-sw/UpwindEnergy.h index 39f6090064075b0dd688e6105ffac94c14421cde..7472790dbd8d7d65618a724d68e0f82d5948815e 100644 --- a/src/Examples/inviscid-flow-sw/UpwindEnergy.h +++ b/src/Examples/inviscid-flow-sw/UpwindEnergy.h @@ -36,14 +36,6 @@ class UpwindEnergyBase UpwindEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "UpwindEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h index ed49dda94585e64f85d820569a757d849757e6ca..1a887e7a51961c37897e9df73c4dbf2c9838156d 100644 --- a/src/Examples/inviscid-flow-sw/UpwindMomentumX.h +++ b/src/Examples/inviscid-flow-sw/UpwindMomentumX.h @@ -46,15 +46,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h index c2126d43af781289f86999a5f4a7f8d24ad5c6e8..2ab8ffe82aecfff8e97391a8ed2dcd8385741648 100644 --- a/src/Examples/inviscid-flow-sw/UpwindMomentumY.h +++ b/src/Examples/inviscid-flow-sw/UpwindMomentumY.h @@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h index 97339e804b3bda5203d0b12feeb59e30249f2327..fe8be0eb20cc14fd491cc92d4df44e6dc737acb4 100644 --- a/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h +++ b/src/Examples/inviscid-flow-sw/UpwindMomentumZ.h @@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-sw/eulerProblem.h b/src/Examples/inviscid-flow-sw/eulerProblem.h index a91e56176dbe8e5b507a61a8d27aaa5050855693..5c10ab7fb11417d637c89988ef29fde31ecd97f5 100644 --- a/src/Examples/inviscid-flow-sw/eulerProblem.h +++ b/src/Examples/inviscid-flow-sw/eulerProblem.h @@ -57,8 +57,6 @@ class eulerProblem: typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; using CommunicatorType = Communicator; - static String getTypeStatic(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h index e0382e9c2485bbec5740df99af47b87a28122139..d4f119d4c962bc6a2047e6401347fd8cda16c552 100644 --- a/src/Examples/inviscid-flow-sw/eulerProblem_impl.h +++ b/src/Examples/inviscid-flow-sw/eulerProblem_impl.h @@ -30,18 +30,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename InviscidOperators, - typename Communicator > -String -eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >:: -getTypeStatic() -{ - return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h index 82747cd18220efc01bc2d68e0247c01723c29fd0..4195913b656162a58fec8688adbc80ff8107cb88 100644 --- a/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h +++ b/src/Examples/inviscid-flow-vl/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h index 03019ed23c85f82ee489c95d8173c0f100cff3c8..df7828be3132d0431b3e2bae230ac090f126a6d4 100644 --- a/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h +++ b/src/Examples/inviscid-flow-vl/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h +++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h +++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h +++ b/src/Examples/inviscid-flow-vl/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/UpwindContinuity.h b/src/Examples/inviscid-flow-vl/UpwindContinuity.h index 4a21cd502b3f54f898a25ef85be84dcf52a52f5d..3d60dfd9fc877e641cb10f0b7bc018c312254be3 100644 --- a/src/Examples/inviscid-flow-vl/UpwindContinuity.h +++ b/src/Examples/inviscid-flow-vl/UpwindContinuity.h @@ -37,14 +37,6 @@ class UpwindContinuityBase typedef Pointers::SharedPointer< MeshFunctionType > MeshFunctionPointer; typedef Pointers::SharedPointer< VelocityFieldType > VelocityFieldPointer; - static String getType() - { - return String( "UpwindContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-vl/UpwindEnergy.h b/src/Examples/inviscid-flow-vl/UpwindEnergy.h index e3857cbcdf2e29110d01e3f6140a1f7da6b9f0b6..ce26148d7acf53571770dfa997f2fd251265ed9b 100644 --- a/src/Examples/inviscid-flow-vl/UpwindEnergy.h +++ b/src/Examples/inviscid-flow-vl/UpwindEnergy.h @@ -36,14 +36,6 @@ class UpwindEnergyBase UpwindEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "UpwindEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h index ed49dda94585e64f85d820569a757d849757e6ca..cc7a01bc9662ea69b897de453083eade365a963c 100644 --- a/src/Examples/inviscid-flow-vl/UpwindMomentumX.h +++ b/src/Examples/inviscid-flow-vl/UpwindMomentumX.h @@ -47,15 +47,6 @@ class UpwindMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -130,14 +121,6 @@ class UpwindMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -236,14 +219,6 @@ class UpwindMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h index c2126d43af781289f86999a5f4a7f8d24ad5c6e8..2ab8ffe82aecfff8e97391a8ed2dcd8385741648 100644 --- a/src/Examples/inviscid-flow-vl/UpwindMomentumY.h +++ b/src/Examples/inviscid-flow-vl/UpwindMomentumY.h @@ -46,15 +46,6 @@ class UpwindMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -212,14 +195,6 @@ class UpwindMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h index 97339e804b3bda5203d0b12feeb59e30249f2327..fe8be0eb20cc14fd491cc92d4df44e6dc737acb4 100644 --- a/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h +++ b/src/Examples/inviscid-flow-vl/UpwindMomentumZ.h @@ -46,15 +46,6 @@ class UpwindMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class UpwindMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class UpwindMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, Inde using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "UpwindMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow-vl/eulerProblem.h b/src/Examples/inviscid-flow-vl/eulerProblem.h index a91e56176dbe8e5b507a61a8d27aaa5050855693..5c10ab7fb11417d637c89988ef29fde31ecd97f5 100644 --- a/src/Examples/inviscid-flow-vl/eulerProblem.h +++ b/src/Examples/inviscid-flow-vl/eulerProblem.h @@ -57,8 +57,6 @@ class eulerProblem: typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; using CommunicatorType = Communicator; - static String getTypeStatic(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h index e0382e9c2485bbec5740df99af47b87a28122139..d4f119d4c962bc6a2047e6401347fd8cda16c552 100644 --- a/src/Examples/inviscid-flow-vl/eulerProblem_impl.h +++ b/src/Examples/inviscid-flow-vl/eulerProblem_impl.h @@ -30,18 +30,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename InviscidOperators, - typename Communicator > -String -eulerProblem< Mesh, BoundaryCondition, RightHandSide, InviscidOperators, Communicator >:: -getTypeStatic() -{ - return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h index 2e79798a31a7073241903891d0317502c8494a60..f87c9103372e55dec8597695146e43e316ee91a8 100644 --- a/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h +++ b/src/Examples/inviscid-flow/2d/Euler2DVelXGetter.h @@ -21,8 +21,6 @@ class EulerVelXGetter typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); - EulerVelXGetter( const MeshFunctionType& rho, const MeshFunctionType& rhoVel) : rho( rho ), rhoVel( rhoVel ) diff --git a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h index 2e79798a31a7073241903891d0317502c8494a60..f87c9103372e55dec8597695146e43e316ee91a8 100644 --- a/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h +++ b/src/Examples/inviscid-flow/3d/Euler2DVelXGetter.h @@ -21,8 +21,6 @@ class EulerVelXGetter typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); - EulerVelXGetter( const MeshFunctionType& rho, const MeshFunctionType& rhoVel) : rho( rho ), rhoVel( rhoVel ) diff --git a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h index 45611c64754aa161274bdabe95cd0c60565ef2c1..5a39ca84dd28965165d4c6890acb5e24cc3cf9f2 100644 --- a/src/Examples/inviscid-flow/3d/EulerPressureGetter.h +++ b/src/Examples/inviscid-flow/3d/EulerPressureGetter.h @@ -22,8 +22,6 @@ class EulerPressureGetter typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); - EulerPressureGetter( const MeshFunctionType& rho, const MeshFunctionType& rhoVelX, const MeshFunctionType& rhoVelY, diff --git a/src/Examples/inviscid-flow/3d/EulerVelGetter.h b/src/Examples/inviscid-flow/3d/EulerVelGetter.h index 24d06eaf5f0ce322c17086ea2cf04495c96bd3e7..82441fcaed012d17181a55c7c26a27457bbc7d58 100644 --- a/src/Examples/inviscid-flow/3d/EulerVelGetter.h +++ b/src/Examples/inviscid-flow/3d/EulerVelGetter.h @@ -21,8 +21,6 @@ class EulerVelGetter typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); - EulerVelGetter( const MeshFunctionType& rho, const MeshFunctionType& rhoVelX, const MeshFunctionType& rhoVelY, diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h index ac469a52407477aab2d8a60c3eca92e5d168a253..840fc26803197babe5f5b652f4cf1e7d2f9233ec 100644 --- a/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h +++ b/src/Examples/inviscid-flow/3d/LaxFridrichsContinuity_impl .h @@ -6,21 +6,6 @@ namespace TNL { /**** * 1D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LaxFridrichsContinuity< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -109,21 +94,6 @@ updateLinearSystem( const RealType& time, /**** * 2D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LaxFridrichsContinuity< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -224,21 +194,6 @@ updateLinearSystem( const RealType& time, /**** * 3D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LaxFridrichsContinuity< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h index 9083970ae4bb036b2be46e8556dab2e4f8eb2607..9756f46c86fafd204cbe47da573f0aee5cae6a9a 100644 --- a/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h +++ b/src/Examples/inviscid-flow/3d/LaxFridrichsEnergy.h @@ -29,7 +29,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, I typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -101,7 +100,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, I typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -173,7 +171,6 @@ class LaxFridrichsEnergy< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real, I typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h index 5d1cf919252d85f00f08657e7e2944402940615a..33e9c33ac2e6fe1fc866ffa556cf17d281e5a61b 100644 --- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h +++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumX.h @@ -29,7 +29,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -101,7 +100,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -173,7 +171,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; diff --git a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h index 0d7882f6deaf9d5009775ffa2c9a1ca57d4261db..63be3651052960ff2f786d1c28983fe16f3d6032 100644 --- a/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h +++ b/src/Examples/inviscid-flow/3d/LaxFridrichsMomentumZ.h @@ -29,7 +29,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -101,7 +100,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; @@ -173,7 +171,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real typedef Functions::MeshFunction< MeshType > MeshFunctionType; enum { Dimensions = MeshType::getMeshDimensions() }; - static String getType(); Real tau; MeshFunctionType velocityX; MeshFunctionType velocityY; diff --git a/src/Examples/inviscid-flow/3d/eulerProblem.h b/src/Examples/inviscid-flow/3d/eulerProblem.h index d2ff1fc109c6d548ac503d82e398aa7c5b19c066..429c9d94848b109d18b549e3ad4a7ec0da3a9895 100644 --- a/src/Examples/inviscid-flow/3d/eulerProblem.h +++ b/src/Examples/inviscid-flow/3d/eulerProblem.h @@ -45,8 +45,6 @@ class eulerProblem: typedef typename DifferentialOperator::VelocityX VelocityX; typedef typename DifferentialOperator::Pressure Pressure; - static String getTypeStatic(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h index b01979b87c6218f6e0ff1018caa8f05467bdfd1c..10953ebe2c68e2e32a57364e01206f884a12d9d4 100644 --- a/src/Examples/inviscid-flow/3d/eulerProblem_impl.h +++ b/src/Examples/inviscid-flow/3d/eulerProblem_impl.h @@ -14,17 +14,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator > -String -eulerProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: -getTypeStatic() -{ - return String( "eulerProblem< " ) + Mesh :: getTypeStatic() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h index 0ae10b4f9399fc41c97d5ad35fba4748f98bef95..93e52f04ef8d8095b459521f8f8627afd19dbf16 100644 --- a/src/Examples/inviscid-flow/LaxFridrichsContinuity.h +++ b/src/Examples/inviscid-flow/LaxFridrichsContinuity.h @@ -38,14 +38,6 @@ class LaxFridrichsContinuityBase LaxFridrichsContinuityBase() : artificialViscosity( 1.0 ){}; - - static String getType() - { - return String( "LaxFridrichsContinuity< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } void setTau(const Real& tau) { diff --git a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h index 8c6791cd17516d877a206adff346900fc80d2462..a9bb4148ce25c3ef9618680a7fea29a094e32c81 100644 --- a/src/Examples/inviscid-flow/LaxFridrichsEnergy.h +++ b/src/Examples/inviscid-flow/LaxFridrichsEnergy.h @@ -36,14 +36,6 @@ class LaxFridrichsEnergyBase LaxFridrichsEnergyBase() : artificialViscosity( 1.0 ){}; - static String getType() - { - return String( "LaxFridrichsEnergy< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setTau(const Real& tau) { this->tau = tau; diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h index 63def12d315188b82e82402635fca863d1b9a629..b1877a2c4bf5837e08e099b0b3ba30b7b502be4d 100644 --- a/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h +++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumX.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -119,14 +110,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -200,14 +183,6 @@ class LaxFridrichsMomentumX< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumX< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h index 8ce42282dd4c74d5ed72d2abbd661235b95dc160..b4fe7569105bb72f53be3279323269fb841512b9 100644 --- a/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h +++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumY.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -189,14 +172,6 @@ class LaxFridrichsMomentumY< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumY< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h index a67e862ceffd78d4fd770d7b1a07e9f05af349d8..fe853959515c01e9b922cf866fb0b71388326175 100644 --- a/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h +++ b/src/Examples/inviscid-flow/LaxFridrichsMomentumZ.h @@ -46,15 +46,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -108,14 +99,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Rea using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ @@ -169,14 +152,6 @@ class LaxFridrichsMomentumZ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, Real using typename BaseType::VelocityFieldType; using typename BaseType::VelocityFieldPointer; using BaseType::Dimensions; - - static String getType() - { - return String( "LaxFridrichsMomentumZ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > __cuda_callable__ diff --git a/src/Examples/inviscid-flow/eulerProblem.h b/src/Examples/inviscid-flow/eulerProblem.h index a854f8098e751d65d9f1e542c540d653b1fb08c1..dfc7be55908efc552013e92e753a3beafe887d46 100644 --- a/src/Examples/inviscid-flow/eulerProblem.h +++ b/src/Examples/inviscid-flow/eulerProblem.h @@ -56,8 +56,6 @@ class eulerProblem: typedef Pointers::SharedPointer< BoundaryCondition > BoundaryConditionPointer; typedef Pointers::SharedPointer< RightHandSide, DeviceType > RightHandSidePointer; - static String getType(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/inviscid-flow/eulerProblem_impl.h b/src/Examples/inviscid-flow/eulerProblem_impl.h index fd64ae2846e347bdadd878c0a3f4052fdbefc522..d203a16ad8ae1b25995d1f446856f328558b777e 100644 --- a/src/Examples/inviscid-flow/eulerProblem_impl.h +++ b/src/Examples/inviscid-flow/eulerProblem_impl.h @@ -31,18 +31,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename Communicator, - typename InviscidOperators > -String -eulerProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, InviscidOperators >:: -getType() -{ - return String( "eulerProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/navier-stokes/navierStokesSetter_impl.h b/src/Examples/navier-stokes/navierStokesSetter_impl.h index 5109284a640553857249865b1232ae007a7ec314..a0369516cf8d4c7120ccef8c6837e380064f97ae 100644 --- a/src/Examples/navier-stokes/navierStokesSetter_impl.h +++ b/src/Examples/navier-stokes/navierStokesSetter_impl.h @@ -29,7 +29,7 @@ template< typename MeshType, typename SolverStarter > typename IndexType > bool navierStokesSetter< MeshType, SolverStarter > :: run( const Config::ParameterContainer& parameters ) { - std::cerr << "The solver is not implemented for the mesh " << MeshType::getType() << "." << std::endl; + std::cerr << "The solver is not implemented for the mesh " << getType< MeshType >() << "." << std::endl; return false; } diff --git a/src/Examples/navier-stokes/navierStokesSolver.h b/src/Examples/navier-stokes/navierStokesSolver.h index 262d9d4802752de0d53235bb0a9ad6afd8463a06..c0166701abb96fa6cb76d016a8d26a821bab0769 100644 --- a/src/Examples/navier-stokes/navierStokesSolver.h +++ b/src/Examples/navier-stokes/navierStokesSolver.h @@ -55,8 +55,6 @@ class navierStokesSolver navierStokesSolver(); - static String getType(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/Examples/navier-stokes/navierStokesSolver_impl.h b/src/Examples/navier-stokes/navierStokesSolver_impl.h index d4120d38048160d1361bd56106c065cc022f43be..a42c7b3177330af85c003a3e5f9fb6b5385ce91f 100644 --- a/src/Examples/navier-stokes/navierStokesSolver_impl.h +++ b/src/Examples/navier-stokes/navierStokesSolver_impl.h @@ -285,13 +285,6 @@ SolverMonitor* return &solverMonitor; } -template< typename Mesh, typename EulerScheme > -String navierStokesSolver< Mesh, EulerScheme > :: getType() -{ - return String( "navierStokesSolver< " ) + - Mesh :: getType() + " >"; -} - template< typename Mesh, typename EulerScheme > String navierStokesSolver< Mesh, EulerScheme > :: getPrologHeader() const { diff --git a/src/Examples/simple-examples/large-meshfunction-example.h b/src/Examples/simple-examples/large-meshfunction-example.h index 2f9c70b859606f6f02b3689e9cd74e6e87edd7ae..d5520b69e1af3ce54952d49c463c9b85d345903a 100644 --- a/src/Examples/simple-examples/large-meshfunction-example.h +++ b/src/Examples/simple-examples/large-meshfunction-example.h @@ -10,7 +10,6 @@ using namespace TNL; using namespace TNL::Containers; using namespace TNL::Meshes; using namespace TNL::Functions; -using namespace TNL::Devices; int main(int argc, char ** argv) { @@ -28,9 +27,9 @@ int main(int argc, char ** argv) time.start(); #ifdef HAVE_CUDA - using Device=Cuda; + using Device=Devices::Cuda; #else - using Device=Host; + using Device=Devices::Host; #endif using MeshType= Grid<2, double,Device,int>; diff --git a/src/Examples/transport-equation/transportEquationProblem.h b/src/Examples/transport-equation/transportEquationProblem.h index b6aa381d5f1aca5fc004c6274a4128ac28a1c791..802100228cb067423a768d4b5507f0e8045e242d 100644 --- a/src/Examples/transport-equation/transportEquationProblem.h +++ b/src/Examples/transport-equation/transportEquationProblem.h @@ -50,8 +50,6 @@ public PDEProblem< Mesh, using typename BaseType::MeshPointer; using typename BaseType::DofVectorType; using typename BaseType::DofVectorPointer; - - static String getType(); String getPrologHeader() const; diff --git a/src/Examples/transport-equation/transportEquationProblemEoc.h b/src/Examples/transport-equation/transportEquationProblemEoc.h index 62f10e273f95938a48d28ba85230d7463c0db4ce..279af6006e4ec568e564e47aa38fd202a388c04c 100644 --- a/src/Examples/transport-equation/transportEquationProblemEoc.h +++ b/src/Examples/transport-equation/transportEquationProblemEoc.h @@ -49,8 +49,6 @@ public transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communi using typename BaseType::DofVectorPointer; //using BaseType::getExplicitUpdate; - - static String getType(); String getPrologHeader() const; diff --git a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h index 0ac3af2d8d963ea89cbb9a40837cc298c243fa40..8de7eb9bccb68630f7a53213e64f4c391c6f0489 100644 --- a/src/Examples/transport-equation/transportEquationProblemEoc_impl.h +++ b/src/Examples/transport-equation/transportEquationProblemEoc_impl.h @@ -23,18 +23,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename Communicator, - typename DifferentialOperator > -String -transportEquationProblemEoc< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >:: -getType() -{ - return String( "transportEquationProblemEoc< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Examples/transport-equation/transportEquationProblem_impl.h b/src/Examples/transport-equation/transportEquationProblem_impl.h index 7d83ceb0987466f1057df129b1b51d45b163812c..96cf1a6ec263029f783c9a95e87327927d13ce71 100644 --- a/src/Examples/transport-equation/transportEquationProblem_impl.h +++ b/src/Examples/transport-equation/transportEquationProblem_impl.h @@ -21,18 +21,6 @@ namespace TNL { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename Communicator, - typename DifferentialOperator > -String -transportEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >:: -getType() -{ - return String( "transportEquationProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/Python/pytnl/tnl/Array.h b/src/Python/pytnl/tnl/Array.h index acebce3d22701f68b2a8beb831292fb997ed0a56..6e19878de07d6986d6a6dea1c109371b1c1b59d6 100644 --- a/src/Python/pytnl/tnl/Array.h +++ b/src/Python/pytnl/tnl/Array.h @@ -15,8 +15,6 @@ void export_Array(py::module & m, const char* name) auto array = py::class_<ArrayType>(m, name, py::buffer_protocol()) .def(py::init<>()) .def(py::init<int>()) - .def_static("getType", &ArrayType::getType) - .def("getTypeVirtual", &ArrayType::getTypeVirtual) .def_static("getSerializationType", &ArrayType::getSerializationType) .def("getSerializationTypeVirtual", &ArrayType::getSerializationTypeVirtual) .def("setSize", &ArrayType::setSize) diff --git a/src/Python/pytnl/tnl/Grid.h b/src/Python/pytnl/tnl/Grid.h index afc5b39749362a08248befb7716c8f446e888dad..8cf28a8f5bd393dfda5bfc01b6547c77ad66ba91 100644 --- a/src/Python/pytnl/tnl/Grid.h +++ b/src/Python/pytnl/tnl/Grid.h @@ -59,8 +59,6 @@ void export_Grid( py::module & m, const char* name ) auto grid = py::class_<Grid, TNL::Object>( m, name ) .def(py::init<>()) .def_static("getMeshDimension", &Grid::getMeshDimension) - .def_static("getType", &Grid::getType) - .def("getTypeVirtual", &Grid::getTypeVirtual) .def_static("getSerializationType", &Grid::getSerializationType) .def("getSerializationTypeVirtual", &Grid::getSerializationTypeVirtual) // FIXME: number of parameters depends on the grid dimension diff --git a/src/Python/pytnl/tnl/Mesh.h b/src/Python/pytnl/tnl/Mesh.h index ee17a134804fc2fb71d69f5e81bd2838d57428e2..c0207e243ceae80613ea634b73ff429b94d697d2 100644 --- a/src/Python/pytnl/tnl/Mesh.h +++ b/src/Python/pytnl/tnl/Mesh.h @@ -112,8 +112,6 @@ void export_Mesh( py::module & m, const char* name ) auto mesh = py::class_< Mesh, TNL::Object >( m, name ) .def(py::init<>()) .def_static("getMeshDimension", &Mesh::getMeshDimension) - .def_static("getType", &Mesh::getType) - .def("getTypeVirtual", &Mesh::getTypeVirtual) .def_static("getSerializationType", &Mesh::getSerializationType) .def("getSerializationTypeVirtual", &Mesh::getSerializationTypeVirtual) .def("getEntitiesCount", &mesh_getEntitiesCount< Mesh >) diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 6788d1a68ffd738aef1ca395af9d88b3082b98bc..1a32bd257f52a14f07579abe3671df1978cfc4d2 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -56,8 +56,6 @@ void export_Matrix( py::module & m, const char* name ) auto matrix = py::class_< Matrix, TNL::Object >( m, name ) .def(py::init<>()) // overloads (defined in Object) - .def_static("getType", &Matrix::getType) - .def("getTypeVirtual", &Matrix::getTypeVirtual) .def_static("getSerializationType", &Matrix::getSerializationType) .def("getSerializationTypeVirtual", &Matrix::getSerializationTypeVirtual) .def("print", &Matrix::print) diff --git a/src/Python/pytnl/tnl/StaticVector.h b/src/Python/pytnl/tnl/StaticVector.h index 6b5570647fd31f7c2c8692b9c5b7322227ea7781..ba7cfcaf4f8143c246fe7afb9db8facd03d2621d 100644 --- a/src/Python/pytnl/tnl/StaticVector.h +++ b/src/Python/pytnl/tnl/StaticVector.h @@ -14,7 +14,6 @@ void export_StaticVector( Scope & scope, const char* name ) auto vector = py::class_<VectorType>(scope, name) .def(py::init< RealType >()) .def(py::init< VectorType >()) - .def_static("getType", &VectorType::getType) .def("getSize", &VectorType::getSize) // operator= .def("assign", []( VectorType& vector, const VectorType& other ) -> VectorType& { diff --git a/src/Python/pytnl/tnl/String.cpp b/src/Python/pytnl/tnl/String.cpp index f9fff7d920e49f4eedd55ad9ab5190b644171eaf..3203abda283d55ed20dc2a0eb96100bf84b65cc3 100644 --- a/src/Python/pytnl/tnl/String.cpp +++ b/src/Python/pytnl/tnl/String.cpp @@ -16,7 +16,6 @@ void export_String( py::module & m ) .def(py::init<const char*, int, int>()) .def(py::init([](int v){ return TNL::convertToString(v); })) .def(py::init([](double v){ return TNL::convertToString(v); })) - .def_static("getType", &TNL::String::getType) // __str__ (uses operator<<) // explicit namespace resolution is necessary, see http://stackoverflow.com/a/3084341/4180822 // .def(py::self_ns::str(py::self_ns::self)) diff --git a/src/Python/pytnl/tnl/Vector.h b/src/Python/pytnl/tnl/Vector.h index 9fdac4072b4db7cac83c4eb95739a92a6abe671d..475a53736a97357cc6363d3f56e07b4308caecae 100644 --- a/src/Python/pytnl/tnl/Vector.h +++ b/src/Python/pytnl/tnl/Vector.h @@ -14,8 +14,6 @@ void export_Vector(py::module & m, const char* name) py::class_<VectorType, ArrayType>(m, name) .def(py::init<>()) .def(py::init<int>()) - .def_static("getType", &VectorType::getType) - .def("getTypeVirtual", &VectorType::getTypeVirtual) .def_static("getSerializationType", &VectorType::getSerializationType) .def("getSerializationTypeVirtual", &VectorType::getSerializationTypeVirtual) .def(py::self == py::self) diff --git a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h b/src/TNL/Algorithms/CudaMultireductionKernel.h similarity index 93% rename from src/TNL/Containers/Algorithms/CudaMultireductionKernel.h rename to src/TNL/Algorithms/CudaMultireductionKernel.h index e67c11b419dca3ae83706d43c56f5f282aba4beb..6a078564796d42e28b2c896d8b0589acd1e2cc79 100644 --- a/src/TNL/Containers/Algorithms/CudaMultireductionKernel.h +++ b/src/TNL/Algorithms/CudaMultireductionKernel.h @@ -14,12 +14,12 @@ #include <TNL/Assert.h> #include <TNL/Math.h> -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Containers/Algorithms/CudaReductionBuffer.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/Cuda/SharedMemory.h> +#include <TNL/Algorithms/CudaReductionBuffer.h> #include <TNL/Exceptions/CudaSupportMissing.h> namespace TNL { -namespace Containers { namespace Algorithms { #ifdef HAVE_CUDA @@ -52,7 +52,7 @@ CudaMultireductionKernel( const Result zero, const int n, Result* output ) { - Result* sdata = Devices::Cuda::getSharedMemory< Result >(); + Result* sdata = Cuda::getSharedMemory< Result >(); // Get the thread id (tid), global thread id (gid) and gridSize. const Index tid = threadIdx.y * blockDim.x + threadIdx.x; @@ -160,10 +160,10 @@ CudaMultireductionKernelLauncher( const Result zero, // where blocksPerMultiprocessor is determined according to the number of // available registers on the multiprocessor. // On Tesla K40c, desGridSize = 8 * 15 = 120. - const int activeDevice = Devices::CudaDeviceInfo::getActiveDevice(); - const int blocksdPerMultiprocessor = Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice ) + const int activeDevice = Cuda::DeviceInfo::getActiveDevice(); + const int blocksdPerMultiprocessor = Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice ) / ( Multireduction_maxThreadsPerBlock * Multireduction_registersPerThread ); - const int desGridSizeX = blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ); + const int desGridSizeX = blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ); dim3 blockSize, gridSize; // version A: max 16 rows of threads @@ -189,10 +189,10 @@ CudaMultireductionKernelLauncher( const Result zero, while( blockSize.x * blockSize.y > Multireduction_maxThreadsPerBlock ) blockSize.x /= 2; - gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX ); - gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y ); + gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSizeX ); + gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y ); - if( gridSize.y > (unsigned) Devices::Cuda::getMaxGridSize() ) { + if( gridSize.y > (unsigned) Cuda::getMaxGridSize() ) { std::cerr << "Maximum gridSize.y limit exceeded (limit is 65535, attempted " << gridSize.y << ")." << std::endl; throw 1; } @@ -281,5 +281,4 @@ CudaMultireductionKernelLauncher( const Result zero, } } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h b/src/TNL/Algorithms/CudaReductionBuffer.h similarity index 96% rename from src/TNL/Containers/Algorithms/CudaReductionBuffer.h rename to src/TNL/Algorithms/CudaReductionBuffer.h index 2897c7280a6bc61f9b60a9cb3c7b44a94ad20de3..af9b3fcc254fa11384a83a353d383d9e6ddefc6b 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionBuffer.h +++ b/src/TNL/Algorithms/CudaReductionBuffer.h @@ -14,12 +14,11 @@ #include <stdlib.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CheckDevice.h> #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Exceptions/CudaSupportMissing.h> namespace TNL { -namespace Containers { namespace Algorithms { class CudaReductionBuffer @@ -92,5 +91,4 @@ class CudaReductionBuffer }; } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Algorithms/CudaReductionKernel.h similarity index 95% rename from src/TNL/Containers/Algorithms/CudaReductionKernel.h rename to src/TNL/Algorithms/CudaReductionKernel.h index 82b030e1a9198eebee91609db3c384d69e237079..b97295e0004c1f5c2ccf1ca2155bca76251b44cd 100644 --- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h +++ b/src/TNL/Algorithms/CudaReductionKernel.h @@ -14,13 +14,13 @@ #include <TNL/Assert.h> #include <TNL/Math.h> -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Containers/Algorithms/CudaReductionBuffer.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/Cuda/SharedMemory.h> +#include <TNL/Algorithms/CudaReductionBuffer.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> #include <TNL/Exceptions/CudaSupportMissing.h> namespace TNL { -namespace Containers { namespace Algorithms { /**** @@ -52,7 +52,7 @@ CudaReductionKernel( const Result zero, const Index size, Result* output ) { - Result* sdata = Devices::Cuda::getSharedMemory< Result >(); + Result* sdata = Cuda::getSharedMemory< Result >(); // Get the thread id (tid), global thread id (gid) and gridSize. const Index tid = threadIdx.x; @@ -147,7 +147,7 @@ CudaReductionWithArgumentKernel( const Result zero, Index* idxOutput, const Index* idxInput = nullptr ) { - Result* sdata = Devices::Cuda::getSharedMemory< Result >(); + Result* sdata = Cuda::getSharedMemory< Result >(); Index* sidx = reinterpret_cast< Index* >( &sdata[ blockDim.x ] ); // Get the thread id (tid), global thread id (gid) and gridSize. @@ -282,11 +282,11 @@ struct CudaReductionKernelLauncher // It seems to be better to map only one CUDA block per one multiprocessor or maybe // just slightly more. Therefore we omit blocksdPerMultiprocessor in the following. CudaReductionKernelLauncher( const Index size ) - : activeDevice( Devices::CudaDeviceInfo::getActiveDevice() ), - blocksdPerMultiprocessor( Devices::CudaDeviceInfo::getRegistersPerMultiprocessor( activeDevice ) + : activeDevice( Cuda::DeviceInfo::getActiveDevice() ), + blocksdPerMultiprocessor( Cuda::DeviceInfo::getRegistersPerMultiprocessor( activeDevice ) / ( Reduction_maxThreadsPerBlock * Reduction_registersPerThread ) ), - //desGridSize( blocksdPerMultiprocessor * Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ), - desGridSize( Devices::CudaDeviceInfo::getCudaMultiprocessors( activeDevice ) ), + //desGridSize( blocksdPerMultiprocessor * Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ), + desGridSize( Cuda::DeviceInfo::getCudaMultiprocessors( activeDevice ) ), originalSize( size ) { } @@ -351,7 +351,7 @@ struct CudaReductionKernelLauncher // Copy result on CPU Result result; - ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result, output, 1 ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result, output, 1 ); return result; } @@ -384,8 +384,8 @@ struct CudaReductionKernelLauncher //// // Copy result on CPU std::pair< Index, Result > result; - ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); - ArrayOperations< Devices::Host, Devices::Cuda >::copy( &result.second, output, 1 ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.first, idxOutput, 1 ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( &result.second, output, 1 ); return result; } @@ -402,7 +402,7 @@ struct CudaReductionKernelLauncher #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize.x = Reduction_maxThreadsPerBlock; - gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); + gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); // when there is only one warp per blockSize.x, we need to allocate two warps // worth of shared memory so that we don't index shared memory out of bounds @@ -473,6 +473,7 @@ struct CudaReductionKernelLauncher default: TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); } + cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; */ @@ -482,6 +483,8 @@ struct CudaReductionKernelLauncher CudaReductionKernel< Reduction_maxThreadsPerBlock > <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output); + cudaStreamSynchronize(0); + TNL_CHECK_CUDA_DEVICE; } else { TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; ); @@ -507,7 +510,7 @@ struct CudaReductionKernelLauncher #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize.x = Reduction_maxThreadsPerBlock; - gridSize.x = TNL::min( Devices::Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); + gridSize.x = TNL::min( Cuda::getNumberOfBlocks( size, blockSize.x ), desGridSize ); // when there is only one warp per blockSize.x, we need to allocate two warps // worth of shared memory so that we don't index shared memory out of bounds @@ -578,6 +581,7 @@ struct CudaReductionKernelLauncher default: TNL_ASSERT( false, std::cerr << "Block size is " << blockSize. x << " which is none of 1, 2, 4, 8, 16, 32, 64, 128, 256 or 512." ); } + cudaStreamSynchronize(0); TNL_CHECK_CUDA_DEVICE; */ @@ -587,6 +591,8 @@ struct CudaReductionKernelLauncher CudaReductionWithArgumentKernel< Reduction_maxThreadsPerBlock > <<< gridSize, blockSize, shmem >>>( zero, dataFetcher, reduction, size, output, idxOutput, idxInput ); + cudaStreamSynchronize(0); + TNL_CHECK_CUDA_DEVICE; } else { TNL_ASSERT( false, std::cerr << "Block size was expected to be " << Reduction_maxThreadsPerBlock << ", but " << blockSize.x << " was specified." << std::endl; ); @@ -608,5 +614,4 @@ struct CudaReductionKernelLauncher }; } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h similarity index 83% rename from src/TNL/Containers/Algorithms/CudaScanKernel.h rename to src/TNL/Algorithms/CudaScanKernel.h index a8c3548757668df966b094a9da19e37b88ab7ed8..79a2019594922eee640672edb12d8ef6e9132dd0 100644 --- a/src/TNL/Containers/Algorithms/CudaScanKernel.h +++ b/src/TNL/Algorithms/CudaScanKernel.h @@ -13,12 +13,11 @@ #include <iostream> #include <TNL/Math.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/SharedMemory.h> #include <TNL/Exceptions/CudaBadAlloc.h> #include <TNL/Containers/Array.h> namespace TNL { -namespace Containers { namespace Algorithms { #ifdef HAVE_CUDA @@ -36,8 +35,8 @@ cudaFirstPhaseBlockScan( const ScanType scanType, Real* output, Real* auxArray ) { - Real* sharedData = TNL::Devices::Cuda::getSharedMemory< Real >(); - Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2 ]; + Real* sharedData = TNL::Cuda::getSharedMemory< Real >(); + Real* auxData = &sharedData[ elementsInBlock + elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2 ]; Real* warpSums = &auxData[ blockDim.x ]; const Index lastElementIdx = size - blockIdx.x * elementsInBlock; @@ -54,7 +53,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType, sharedData[ 0 ] = zero; while( idx < elementsInBlock && blockOffset + idx < size ) { - sharedData[ Devices::Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; + sharedData[ Cuda::getInterleaving( idx + 1 ) ] = input[ blockOffset + idx ]; idx += blockDim.x; } } @@ -62,7 +61,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType, { while( idx < elementsInBlock && blockOffset + idx < size ) { - sharedData[ Devices::Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; + sharedData[ Cuda::getInterleaving( idx ) ] = input[ blockOffset + idx ]; idx += blockDim.x; } } @@ -78,33 +77,33 @@ cudaFirstPhaseBlockScan( const ScanType scanType, if( chunkOffset < lastElementInBlock ) { auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset ) ]; + sharedData[ Cuda::getInterleaving( chunkOffset ) ]; } int chunkPointer = 1; while( chunkPointer < chunkSize && chunkOffset + chunkPointer < lastElementInBlock ) { - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ] = - reduction( sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ], - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); + sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ] = + reduction( sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ], + sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer - 1 ) ] ); auxData[ threadIdx.x ] = - sharedData[ Devices::Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; + sharedData[ Cuda::getInterleaving( chunkOffset + chunkPointer ) ]; chunkPointer++; } /*** * Perform the parallel prefix-sum inside warps. */ - const int threadInWarpIdx = threadIdx.x % Devices::Cuda::getWarpSize(); - const int warpIdx = threadIdx.x / Devices::Cuda::getWarpSize(); - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) { + const int threadInWarpIdx = threadIdx.x % Cuda::getWarpSize(); + const int warpIdx = threadIdx.x / Cuda::getWarpSize(); + for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) { if( threadInWarpIdx >= stride && threadIdx.x < numberOfChunks ) auxData[ threadIdx.x ] = reduction( auxData[ threadIdx.x ], auxData[ threadIdx.x - stride ] ); __syncwarp(); } - if( threadInWarpIdx == Devices::Cuda::getWarpSize() - 1 ) + if( threadInWarpIdx == Cuda::getWarpSize() - 1 ) warpSums[ warpIdx ] = auxData[ threadIdx.x ]; __syncthreads(); @@ -112,7 +111,7 @@ cudaFirstPhaseBlockScan( const ScanType scanType, * Compute prefix-sum of warp sums using one warp */ if( warpIdx == 0 ) - for( int stride = 1; stride < Devices::Cuda::getWarpSize(); stride *= 2 ) { + for( int stride = 1; stride < Cuda::getWarpSize(); stride *= 2 ) { if( threadInWarpIdx >= stride ) warpSums[ threadIdx.x ] = reduction( warpSums[ threadIdx.x ], warpSums[ threadIdx.x - stride ] ); __syncwarp(); @@ -136,9 +135,9 @@ cudaFirstPhaseBlockScan( const ScanType scanType, Real chunkShift( zero ); if( chunkIdx > 0 ) chunkShift = auxData[ chunkIdx - 1 ]; - sharedData[ Devices::Cuda::getInterleaving( idx ) ] = - reduction( sharedData[ Devices::Cuda::getInterleaving( idx ) ], chunkShift ); - output[ blockOffset + idx ] = sharedData[ Devices::Cuda::getInterleaving( idx ) ]; + sharedData[ Cuda::getInterleaving( idx ) ] = + reduction( sharedData[ Cuda::getInterleaving( idx ) ], chunkShift ); + output[ blockOffset + idx ] = sharedData[ Cuda::getInterleaving( idx ) ]; idx += blockDim.x; } __syncthreads(); @@ -147,11 +146,11 @@ cudaFirstPhaseBlockScan( const ScanType scanType, { if( scanType == ScanType::Exclusive ) { - auxArray[ blockIdx.x ] = reduction( sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ], - sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock ) ] ); + auxArray[ blockIdx.x ] = reduction( sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ], + sharedData[ Cuda::getInterleaving( lastElementInBlock ) ] ); } else - auxArray[ blockIdx.x ] = sharedData[ Devices::Cuda::getInterleaving( lastElementInBlock - 1 ) ]; + auxArray[ blockIdx.x ] = sharedData[ Cuda::getInterleaving( lastElementInBlock - 1 ) ]; } } @@ -245,11 +244,11 @@ struct CudaScanKernelLauncher // compute the number of grids const int elementsInBlock = 8 * blockSize; const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); + const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); //std::cerr << "numberOfgrids = " << numberOfGrids << std::endl; // allocate array for the block sums - Array< Real, Devices::Cuda > blockSums; + Containers::Array< Real, Devices::Cuda > blockSums; blockSums.setSize( numberOfBlocks ); // loop over all grids @@ -268,8 +267,8 @@ struct CudaScanKernelLauncher // run the kernel const std::size_t sharedDataSize = elementsInBlock + - elementsInBlock / Devices::Cuda::getNumberOfSharedMemoryBanks() + 2; - const std::size_t sharedMemory = ( sharedDataSize + blockSize + Devices::Cuda::getWarpSize() ) * sizeof( Real ); + elementsInBlock / Cuda::getNumberOfSharedMemoryBanks() + 2; + const std::size_t sharedMemory = ( sharedDataSize + blockSize + Cuda::getWarpSize() ) * sizeof( Real ); cudaFirstPhaseBlockScan<<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( scanType, reduction, @@ -330,7 +329,7 @@ struct CudaScanKernelLauncher // compute the number of grids const int elementsInBlock = 8 * blockSize; const Index numberOfBlocks = roundUpDivision( size, elementsInBlock ); - const Index numberOfGrids = Devices::Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); + const Index numberOfGrids = Cuda::getNumberOfGrids( numberOfBlocks, maxGridSize() ); // loop over all grids for( Index gridIdx = 0; gridIdx < numberOfGrids; gridIdx++ ) { @@ -369,13 +368,13 @@ struct CudaScanKernelLauncher */ static int& maxGridSize() { - static int maxGridSize = Devices::Cuda::getMaxGridSize(); + static int maxGridSize = Cuda::getMaxGridSize(); return maxGridSize; } static void resetMaxGridSize() { - maxGridSize() = Devices::Cuda::getMaxGridSize(); + maxGridSize() = Cuda::getMaxGridSize(); } static int& gridsCount() @@ -388,5 +387,4 @@ struct CudaScanKernelLauncher #endif } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/DistributedScan.h b/src/TNL/Algorithms/DistributedScan.h similarity index 92% rename from src/TNL/Containers/Algorithms/DistributedScan.h rename to src/TNL/Algorithms/DistributedScan.h index 44fd425b92efcd5ab047111271e9f5b8a319e080..742acd5ed923b4d0e0cbf14e37be8fb40866ec06 100644 --- a/src/TNL/Containers/Algorithms/DistributedScan.h +++ b/src/TNL/Algorithms/DistributedScan.h @@ -12,11 +12,10 @@ #pragma once -#include <TNL/Containers/Algorithms/Scan.h> +#include <TNL/Algorithms/Scan.h> #include <TNL/Containers/Vector.h> namespace TNL { -namespace Containers { namespace Algorithms { template< ScanType Type > @@ -51,11 +50,11 @@ struct DistributedScan const int nproc = CommunicatorType::GetSize( group ); RealType dataForScatter[ nproc ]; for( int i = 0; i < nproc; i++ ) dataForScatter[ i ] = localSum; - Vector< RealType, Devices::Host > rankSums( nproc ); + Containers::Vector< RealType, Devices::Host > rankSums( nproc ); // NOTE: exchanging general data types does not work with MPI CommunicatorType::Alltoall( dataForScatter, 1, rankSums.getData(), 1, group ); - // compute prefix-sum of the per-rank sums + // compute the scan of the per-rank sums Scan< Devices::Host, ScanType::Exclusive >::perform( rankSums, 0, nproc, reduction, zero ); // perform second phase: shift by the per-block and per-rank offsets @@ -66,5 +65,4 @@ struct DistributedScan }; } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Algorithms/MemoryOperations.h similarity index 58% rename from src/TNL/Containers/Algorithms/ArrayOperations.h rename to src/TNL/Algorithms/MemoryOperations.h index ca62f5b7ea45254298cb02d0ac909ee2242e72f2..59da324028c513853fdc6da81ba21d877bb98334 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperations.h +++ b/src/TNL/Algorithms/MemoryOperations.h @@ -1,5 +1,5 @@ /*************************************************************************** - ArrayOperations.h - description + MemoryOperations.h - description ------------------- begin : Jul 15, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -10,21 +10,19 @@ #pragma once +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { -namespace Containers { namespace Algorithms { -template< typename DestinationDevice, - typename SourceDevice = DestinationDevice > -struct ArrayOperations; +template< typename DestinationDevice > +struct MemoryOperations; -// TODO: establish the concept of a "void device" for static computations in the whole TNL template<> -struct ArrayOperations< void > +struct MemoryOperations< Devices::Sequential > { template< typename Element > __cuda_callable__ @@ -49,37 +47,6 @@ struct ArrayOperations< void > const SourceElement* source, const Index size ); - template< typename Element1, - typename Element2, - typename Index > - __cuda_callable__ - static bool compare( const Element1* destination, - const Element2* source, - const Index size ); -}; - -template<> -struct ArrayOperations< Devices::Host > -{ - template< typename Element > - static void setElement( Element* data, - const Element& value ); - - template< typename Element > - static Element getElement( const Element* data ); - - template< typename Element, typename Index > - static void set( Element* data, - const Element& value, - const Index size ); - - template< typename DestinationElement, - typename SourceElement, - typename Index > - static void copy( DestinationElement* destination, - const SourceElement* source, - const Index size ); - template< typename DestinationElement, typename Index, typename SourceIterator > @@ -91,25 +58,28 @@ struct ArrayOperations< Devices::Host > template< typename Element1, typename Element2, typename Index > + __cuda_callable__ static bool compare( const Element1* destination, const Element2* source, const Index size ); template< typename Element, typename Index > + __cuda_callable__ static bool containsValue( const Element* data, const Index size, const Element& value ); template< typename Element, typename Index > + __cuda_callable__ static bool containsOnlyValue( const Element* data, const Index size, const Element& value ); }; template<> -struct ArrayOperations< Devices::Cuda > +struct MemoryOperations< Devices::Host > { template< typename Element > static void setElement( Element* data, @@ -159,44 +129,7 @@ struct ArrayOperations< Devices::Cuda > }; template<> -struct ArrayOperations< Devices::Cuda, Devices::Host > -{ - template< typename DestinationElement, - typename SourceElement, - typename Index > - static void copy( DestinationElement* destination, - const SourceElement* source, - const Index size ); - - template< typename DestinationElement, - typename SourceElement, - typename Index > - static bool compare( const DestinationElement* destination, - const SourceElement* source, - const Index size ); -}; - -template<> -struct ArrayOperations< Devices::Host, Devices::Cuda > -{ - template< typename DestinationElement, - typename SourceElement, - typename Index > - static void copy( DestinationElement* destination, - const SourceElement* source, - const Index size ); - - template< typename Element1, - typename Element2, - typename Index > - static bool compare( const Element1* destination, - const Element2* source, - const Index size ); -}; - - -template<> -struct ArrayOperations< Devices::MIC > +struct MemoryOperations< Devices::Cuda > { template< typename Element > static void setElement( Element* data, @@ -245,49 +178,9 @@ struct ArrayOperations< Devices::MIC > const Element& value ); }; -template<> -struct ArrayOperations< Devices::MIC, Devices::Host > -{ - public: - - template< typename DestinationElement, - typename SourceElement, - typename Index > - static void copy( DestinationElement* destination, - const SourceElement* source, - const Index size ); - - template< typename DestinationElement, - typename SourceElement, - typename Index > - static bool compare( const DestinationElement* destination, - const SourceElement* source, - const Index size ); -}; - -template<> -struct ArrayOperations< Devices::Host, Devices::MIC > -{ - template< typename DestinationElement, - typename SourceElement, - typename Index > - static void copy( DestinationElement* destination, - const SourceElement* source, - const Index size ); - - template< typename DestinationElement, - typename SourceElement, - typename Index > - static bool compare( const DestinationElement* destination, - const SourceElement* source, - const Index size ); -}; - } // namespace Algorithms -} // namespace Containers } // namespace TNL -#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp> -#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp> -#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp> -#include <TNL/Containers/Algorithms/ArrayOperationsMIC.hpp> +#include <TNL/Algorithms/MemoryOperationsSequential.hpp> +#include <TNL/Algorithms/MemoryOperationsHost.hpp> +#include <TNL/Algorithms/MemoryOperationsCuda.hpp> diff --git a/src/TNL/Algorithms/MemoryOperationsCuda.hpp b/src/TNL/Algorithms/MemoryOperationsCuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea4b92b61ba5d52fdc6ea98f656d25a97db02ab9 --- /dev/null +++ b/src/TNL/Algorithms/MemoryOperationsCuda.hpp @@ -0,0 +1,159 @@ +/*************************************************************************** + MemoryOperationsCuda.hpp - description + ------------------- + begin : Jul 16, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <iostream> +#include <memory> // std::unique_ptr +#include <stdexcept> + +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/Reduction.h> +#include <TNL/Exceptions/CudaSupportMissing.h> + +namespace TNL { +namespace Algorithms { + +template< typename Element > +void +MemoryOperations< Devices::Cuda >:: +setElement( Element* data, + const Element& value ) +{ + TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); + MemoryOperations< Devices::Cuda >::set( data, value, 1 ); +} + +template< typename Element > +Element +MemoryOperations< Devices::Cuda >:: +getElement( const Element* data ) +{ + TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); + Element result; + MultiDeviceMemoryOperations< void, Devices::Cuda >::template copy< Element, Element, int >( &result, data, 1 ); + return result; +} + +template< typename Element, typename Index > +void +MemoryOperations< Devices::Cuda >:: +set( Element* data, + const Element& value, + const Index size ) +{ + if( size == 0 ) return; + TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); + auto kernel = [data, value] __cuda_callable__ ( Index i ) + { + data[ i ] = value; + }; + ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); +} + +template< typename DestinationElement, + typename SourceElement, + typename Index > +void +MemoryOperations< Devices::Cuda >:: +copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + if( size == 0 ) return; + TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); + + // our ParallelFor kernel is faster than cudaMemcpy + auto kernel = [destination, source] __cuda_callable__ ( Index i ) + { + destination[ i ] = source[ i ]; + }; + ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); +} + +template< typename DestinationElement, + typename Index, + typename SourceIterator > +void +MemoryOperations< Devices::Cuda >:: +copyFromIterator( DestinationElement* destination, + Index destinationSize, + SourceIterator first, + SourceIterator last ) +{ + using BaseType = typename std::remove_cv< DestinationElement >::type; + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), destinationSize ); + std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; + Index copiedElements = 0; + while( copiedElements < destinationSize && first != last ) { + Index i = 0; + while( i < buffer_size && first != last ) + buffer[ i++ ] = *first++; + MultiDeviceMemoryOperations< Devices::Cuda, void >::copy( &destination[ copiedElements ], buffer.get(), i ); + copiedElements += i; + } + if( first != last ) + throw std::length_error( "Source iterator is larger than the destination array." ); +} + +template< typename Element1, + typename Element2, + typename Index > +bool +MemoryOperations< Devices::Cuda >:: +compare( const Element1* destination, + const Element2* source, + const Index size ) +{ + if( size == 0 ) return true; + TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); + + auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; + return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); +} + +template< typename Element, + typename Index > +bool +MemoryOperations< Devices::Cuda >:: +containsValue( const Element* data, + const Index size, + const Element& value ) +{ + if( size == 0 ) return false; + TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); + TNL_ASSERT_GE( size, (Index) 0, "" ); + + auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; + return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false ); +} + +template< typename Element, + typename Index > +bool +MemoryOperations< Devices::Cuda >:: +containsOnlyValue( const Element* data, + const Index size, + const Element& value ) +{ + if( size == 0 ) return false; + TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); + TNL_ASSERT_GE( size, 0, "" ); + + auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; + return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); +} + +} // namespace Algorithms +} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp b/src/TNL/Algorithms/MemoryOperationsHost.hpp similarity index 55% rename from src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp rename to src/TNL/Algorithms/MemoryOperationsHost.hpp index 3351444141e2eb3584d2f582e1be7026fc34532e..cc85975f55700c5da73fc6bca509fffc75b0e7bb 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsHost.hpp +++ b/src/TNL/Algorithms/MemoryOperationsHost.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - ArrayOperationsHost.hpp - description + MemoryOperationsHost.hpp - description ------------------- begin : Jul 16, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -12,19 +12,18 @@ #include <type_traits> #include <stdexcept> -#include <string.h> +#include <algorithm> // std::copy, std::equal -#include <TNL/ParallelFor.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/Reduction.h> namespace TNL { -namespace Containers { namespace Algorithms { template< typename Element > void -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: setElement( Element* data, const Element& value ) { @@ -34,7 +33,7 @@ setElement( Element* data, template< typename Element > Element -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: getElement( const Element* data ) { TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); @@ -43,7 +42,7 @@ getElement( const Element* data ) template< typename Element, typename Index > void -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: set( Element* data, const Element& value, const Index size ) @@ -61,60 +60,47 @@ template< typename DestinationElement, typename SourceElement, typename Index > void -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: copy( DestinationElement* destination, const SourceElement* source, const Index size ) { if( size == 0 ) return; - if( std::is_same< DestinationElement, SourceElement >::value && - ( std::is_fundamental< DestinationElement >::value || - std::is_pointer< DestinationElement >::value ) ) - { - // GCC 8.1 complains that we bypass a non-trivial copy-constructor - // (in C++17 we could use constexpr if to avoid compiling this branch in that case) - #if defined(__GNUC__) && ( __GNUC__ > 8 || ( __GNUC__ == 8 && __GNUC_MINOR__ > 0 ) ) && !defined(__clang__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wclass-memaccess" - #endif - memcpy( destination, source, size * sizeof( DestinationElement ) ); - #if defined(__GNUC__) && !defined(__clang__) && !defined(__NVCC__) - #pragma GCC diagnostic pop - #endif - } - else - { + TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); + + // our ParallelFor version is faster than std::copy iff we use more than 1 thread + if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { auto kernel = [destination, source]( Index i ) { destination[ i ] = source[ i ]; }; ParallelFor< Devices::Host >::exec( (Index) 0, size, kernel ); } + else { + // std::copy usually uses std::memcpy for TriviallyCopyable types + std::copy( source, source + size, destination ); + } } template< typename DestinationElement, typename Index, typename SourceIterator > void -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: copyFromIterator( DestinationElement* destination, Index destinationSize, SourceIterator first, SourceIterator last ) { - Index i = 0; - while( i < destinationSize && first != last ) - destination[ i++ ] = *first++; - if( first != last ) - throw std::length_error( "Source iterator is larger than the destination array." ); + MemoryOperations< Devices::Sequential >::copyFromIterator( destination, destinationSize, first, last ); } - template< typename DestinationElement, typename SourceElement, typename Index > bool -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: compare( const DestinationElement* destination, const SourceElement* source, const Index size ) @@ -122,24 +108,21 @@ compare( const DestinationElement* destination, if( size == 0 ) return true; TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); - if( std::is_same< DestinationElement, SourceElement >::value && - ( std::is_fundamental< DestinationElement >::value || - std::is_pointer< DestinationElement >::value ) ) - { - if( memcmp( destination, source, size * sizeof( DestinationElement ) ) != 0 ) - return false; + + if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { + auto fetch = [destination, source] ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; + return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true ); + } + else { + // sequential algorithm can return as soon as it finds a mismatch + return std::equal( source, source + size, destination ); } - else - for( Index i = 0; i < size; i++ ) - if( ! ( destination[ i ] == source[ i ] ) ) - return false; - return true; } template< typename Element, typename Index > bool -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: containsValue( const Element* data, const Index size, const Element& value ) @@ -148,16 +131,20 @@ containsValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); - for( Index i = 0; i < size; i++ ) - if( data[ i ] == value ) - return true; - return false; + if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { + auto fetch = [=] ( Index i ) -> bool { return data[ i ] == value; }; + return Reduction< Devices::Host >::reduce( size, std::logical_or<>{}, fetch, false ); + } + else { + // sequential algorithm can return as soon as it finds a match + return MemoryOperations< Devices::Sequential >::containsValue( data, size, value ); + } } template< typename Element, typename Index > bool -ArrayOperations< Devices::Host >:: +MemoryOperations< Devices::Host >:: containsOnlyValue( const Element* data, const Index size, const Element& value ) @@ -166,12 +153,15 @@ containsOnlyValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); - for( Index i = 0; i < size; i++ ) - if( ! ( data[ i ] == value ) ) - return false; - return true; + if( Devices::Host::isOMPEnabled() && Devices::Host::getMaxThreadsCount() > 1 ) { + auto fetch = [data, value] ( Index i ) -> bool { return data[ i ] == value; }; + return Reduction< Devices::Host >::reduce( size, std::logical_and<>{}, fetch, true ); + } + else { + // sequential algorithm can return as soon as it finds a mismatch + return MemoryOperations< Devices::Sequential >::containsOnlyValue( data, size, value ); + } } } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Algorithms/MemoryOperationsSequential.hpp b/src/TNL/Algorithms/MemoryOperationsSequential.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9e5ad25b1392ccd093952b0dbb25b941370eb833 --- /dev/null +++ b/src/TNL/Algorithms/MemoryOperationsSequential.hpp @@ -0,0 +1,135 @@ +/*************************************************************************** + MemoryOperationsSequential.hpp - description + ------------------- + begin : Apr 8, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Algorithms/MemoryOperations.h> + +namespace TNL { +namespace Algorithms { + +template< typename Element > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +setElement( Element* data, + const Element& value ) +{ + *data = value; +} + +template< typename Element > +__cuda_callable__ +Element +MemoryOperations< Devices::Sequential >:: +getElement( const Element* data ) +{ + return *data; +} + +template< typename Element, typename Index > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +set( Element* data, + const Element& value, + const Index size ) +{ + for( Index i = 0; i < size; i ++ ) + data[ i ] = value; +} + +template< typename DestinationElement, + typename SourceElement, + typename Index > +__cuda_callable__ +void +MemoryOperations< Devices::Sequential >:: +copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + for( Index i = 0; i < size; i ++ ) + destination[ i ] = source[ i ]; +} + +template< typename DestinationElement, + typename Index, + typename SourceIterator > +void +MemoryOperations< Devices::Sequential >:: +copyFromIterator( DestinationElement* destination, + Index destinationSize, + SourceIterator first, + SourceIterator last ) +{ + Index i = 0; + while( i < destinationSize && first != last ) + destination[ i++ ] = *first++; + if( first != last ) + throw std::length_error( "Source iterator is larger than the destination array." ); +} + +template< typename Element1, + typename Element2, + typename Index > +__cuda_callable__ +bool +MemoryOperations< Devices::Sequential >:: +compare( const Element1* destination, + const Element2* source, + const Index size ) +{ + for( Index i = 0; i < size; i++ ) + if( ! ( destination[ i ] == source[ i ] ) ) + return false; + return true; +} + +template< typename Element, + typename Index > +__cuda_callable__ +bool +MemoryOperations< Devices::Sequential >:: +containsValue( const Element* data, + const Index size, + const Element& value ) +{ + if( size == 0 ) return false; + TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); + TNL_ASSERT_GE( size, 0, "" ); + + for( Index i = 0; i < size; i++ ) + if( data[ i ] == value ) + return true; + return false; +} + +template< typename Element, + typename Index > +__cuda_callable__ +bool +MemoryOperations< Devices::Sequential >:: +containsOnlyValue( const Element* data, + const Index size, + const Element& value ) +{ + if( size == 0 ) return false; + TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); + TNL_ASSERT_GE( size, 0, "" ); + + for( Index i = 0; i < size; i++ ) + if( ! ( data[ i ] == value ) ) + return false; + return true; +} + +} // namespace Algorithms +} // namespace TNL diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h new file mode 100644 index 0000000000000000000000000000000000000000..48e5ad64750c5dc8b7a84a9b4346b345e6ff3f1a --- /dev/null +++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h @@ -0,0 +1,278 @@ +/*************************************************************************** + MultiDeviceMemoryOperations.h - description + ------------------- + begin : Aug 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Algorithms/MemoryOperations.h> + +namespace TNL { +namespace Algorithms { + +template< typename DestinationDevice, + typename SourceDevice = DestinationDevice > +struct MultiDeviceMemoryOperations +{ + template< typename DestinationElement, + typename SourceElement, + typename Index > + static void copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) + { + // use DestinationDevice, unless it is void + using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >; + MemoryOperations< Device >::copy( destination, source, size ); + } + + template< typename DestinationElement, + typename SourceElement, + typename Index > + static bool compare( const DestinationElement* destination, + const SourceElement* source, + const Index size ) + { + // use DestinationDevice, unless it is void + using Device = std::conditional_t< std::is_void< DestinationDevice >::value, SourceDevice, DestinationDevice >; + return MemoryOperations< Device >::compare( destination, source, size ); + } +}; + + +template< typename DeviceType > +struct MultiDeviceMemoryOperations< Devices::Cuda, DeviceType > +{ + template< typename DestinationElement, + typename SourceElement, + typename Index > + static void copy( DestinationElement* destination, + const SourceElement* source, + const Index size ); + + template< typename DestinationElement, + typename SourceElement, + typename Index > + static bool compare( const DestinationElement* destination, + const SourceElement* source, + const Index size ); +}; + +template< typename DeviceType > +struct MultiDeviceMemoryOperations< DeviceType, Devices::Cuda > +{ + template< typename DestinationElement, + typename SourceElement, + typename Index > + static void copy( DestinationElement* destination, + const SourceElement* source, + const Index size ); + + template< typename Element1, + typename Element2, + typename Index > + static bool compare( const Element1* destination, + const Element2* source, + const Index size ); +}; + + +// CUDA <-> CUDA to disambiguate from partial specializations below +template<> +struct MultiDeviceMemoryOperations< Devices::Cuda, Devices::Cuda > +{ + template< typename DestinationElement, + typename SourceElement, + typename Index > + static void copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) + { + MemoryOperations< Devices::Cuda >::copy( destination, source, size ); + } + + template< typename DestinationElement, + typename SourceElement, + typename Index > + static bool compare( const DestinationElement* destination, + const SourceElement* source, + const Index size ) + { + return MemoryOperations< Devices::Cuda >::compare( destination, source, size ); + } +}; + + +/**** + * Operations CUDA -> Host + */ +template< typename DeviceType > + template< typename DestinationElement, + typename SourceElement, + typename Index > +void +MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >:: +copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + if( size == 0 ) return; + TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); +#ifdef HAVE_CUDA + if( std::is_same< DestinationElement, SourceElement >::value ) + { + if( cudaMemcpy( destination, + source, + size * sizeof( DestinationElement ), + cudaMemcpyDeviceToHost ) != cudaSuccess ) + std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; + TNL_CHECK_CUDA_DEVICE; + } + else + { + using BaseType = typename std::remove_cv< SourceElement >::type; + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size ); + std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; + Index i = 0; + while( i < size ) + { + if( cudaMemcpy( (void*) buffer.get(), + (void*) &source[ i ], + TNL::min( size - i, buffer_size ) * sizeof(SourceElement), + cudaMemcpyDeviceToHost ) != cudaSuccess ) + std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; + TNL_CHECK_CUDA_DEVICE; + int j = 0; + while( j < buffer_size && i + j < size ) + { + destination[ i + j ] = buffer[ j ]; + j++; + } + i += j; + } + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + + +template< typename DeviceType > + template< typename Element1, + typename Element2, + typename Index > +bool +MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >:: +compare( const Element1* destination, + const Element2* source, + const Index size ) +{ + if( size == 0 ) return true; + /*** + * Here, destination is on host and source is on CUDA device. + */ + TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); +#ifdef HAVE_CUDA + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(Element2), size ); + std::unique_ptr< Element2[] > host_buffer{ new Element2[ buffer_size ] }; + Index compared = 0; + while( compared < size ) + { + const int transfer = TNL::min( size - compared, buffer_size ); + if( cudaMemcpy( (void*) host_buffer.get(), + (void*) &source[ compared ], + transfer * sizeof(Element2), + cudaMemcpyDeviceToHost ) != cudaSuccess ) + std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; + TNL_CHECK_CUDA_DEVICE; + if( ! MemoryOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) ) + return false; + compared += transfer; + } + return true; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +/**** + * Operations Host -> CUDA + */ +template< typename DeviceType > + template< typename DestinationElement, + typename SourceElement, + typename Index > +void +MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >:: +copy( DestinationElement* destination, + const SourceElement* source, + const Index size ) +{ + if( size == 0 ) return; + TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); + TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); + TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); +#ifdef HAVE_CUDA + if( std::is_same< DestinationElement, SourceElement >::value ) + { + if( cudaMemcpy( destination, + source, + size * sizeof( DestinationElement ), + cudaMemcpyHostToDevice ) != cudaSuccess ) + std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; + TNL_CHECK_CUDA_DEVICE; + } + else + { + const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(DestinationElement), size ); + std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ buffer_size ] }; + Index i = 0; + while( i < size ) + { + int j = 0; + while( j < buffer_size && i + j < size ) + { + buffer[ j ] = source[ i + j ]; + j++; + } + if( cudaMemcpy( (void*) &destination[ i ], + (void*) buffer.get(), + j * sizeof( DestinationElement ), + cudaMemcpyHostToDevice ) != cudaSuccess ) + std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; + TNL_CHECK_CUDA_DEVICE; + i += j; + } + } +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +template< typename DeviceType > + template< typename Element1, + typename Element2, + typename Index > +bool +MultiDeviceMemoryOperations< Devices::Cuda, DeviceType >:: +compare( const Element1* hostData, + const Element2* deviceData, + const Index size ) +{ + if( size == 0 ) return true; + TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." ); + TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); + return MultiDeviceMemoryOperations< DeviceType, Devices::Cuda >::compare( deviceData, hostData, size ); +} + +} // namespace Algorithms +} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/Multireduction.h b/src/TNL/Algorithms/Multireduction.h similarity index 73% rename from src/TNL/Containers/Algorithms/Multireduction.h rename to src/TNL/Algorithms/Multireduction.h index 9802a295356db6e53c7f9c3d809c3dc9c70b38c8..8e63fa7eabce3e8d9d837770794d991fd12705e7 100644 --- a/src/TNL/Containers/Algorithms/Multireduction.h +++ b/src/TNL/Algorithms/Multireduction.h @@ -14,16 +14,45 @@ #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> namespace TNL { -namespace Containers { namespace Algorithms { template< typename Device > struct Multireduction; +template<> +struct Multireduction< Devices::Sequential > +{ + /** + * Parameters: + * zero: starting value for reduction + * dataFetcher: callable object such that `dataFetcher( i, j )` yields + * the i-th value to be reduced from the j-th dataset + * (i = 0,...,size-1; j = 0,...,n-1) + * reduction: callable object representing the reduction operation + * for example, it can be an instance of std::plus, std::logical_and, + * std::logical_or etc. + * size: the size of each dataset + * n: number of datasets to be reduced + * result: output array of size = n + */ + template< typename Result, + typename DataFetcher, + typename Reduction, + typename Index > + static constexpr void + reduce( const Result zero, + DataFetcher dataFetcher, + const Reduction reduction, + const Index size, + const int n, + Result* result ); +}; + template<> struct Multireduction< Devices::Host > { @@ -83,7 +112,6 @@ struct Multireduction< Devices::Cuda > }; } // namespace Algorithms -} // namespace Containers } // namespace TNL #include "Multireduction.hpp" diff --git a/src/TNL/Containers/Algorithms/Multireduction.hpp b/src/TNL/Algorithms/Multireduction.hpp similarity index 65% rename from src/TNL/Containers/Algorithms/Multireduction.hpp rename to src/TNL/Algorithms/Multireduction.hpp index 8c74ee9ac479a12c30c0e2b49df787c5bd2c277d..0bfead2871a5d216522845680c02912cdcd1d8b6 100644 --- a/src/TNL/Containers/Algorithms/Multireduction.hpp +++ b/src/TNL/Algorithms/Multireduction.hpp @@ -17,9 +17,9 @@ //#define CUDA_REDUCTION_PROFILING #include <TNL/Assert.h> -#include <TNL/Containers/Algorithms/Multireduction.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/CudaMultireductionKernel.h> +#include <TNL/Algorithms/Multireduction.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> +#include <TNL/Algorithms/CudaMultireductionKernel.h> #ifdef CUDA_REDUCTION_PROFILING #include <TNL/Timer.h> @@ -27,9 +27,85 @@ #endif namespace TNL { -namespace Containers { namespace Algorithms { +template< typename Result, + typename DataFetcher, + typename Reduction, + typename Index > +void constexpr +Multireduction< Devices::Sequential >:: +reduce( const Result zero, + DataFetcher dataFetcher, + const Reduction reduction, + const Index size, + const int n, + Result* result ) +{ + TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." ); + TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." ); + + constexpr int block_size = 128; + const int blocks = size / block_size; + + if( blocks > 1 ) { + // initialize array for unrolled results + // (it is accessed as a row-major matrix with n rows and 4 columns) + Result r[ n * 4 ]; + for( int k = 0; k < n * 4; k++ ) + r[ k ] = zero; + + // main reduction (explicitly unrolled loop) + for( int b = 0; b < blocks; b++ ) { + const Index offset = b * block_size; + for( int k = 0; k < n; k++ ) { + Result* _r = r + 4 * k; + for( int i = 0; i < block_size; i += 4 ) { + _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i, k ) ); + _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) ); + _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) ); + _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) ); + } + } + } + + // reduction of the last, incomplete block (not unrolled) + for( int k = 0; k < n; k++ ) { + Result* _r = r + 4 * k; + for( Index i = blocks * block_size; i < size; i++ ) + _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) ); + } + + // reduction of unrolled results + for( int k = 0; k < n; k++ ) { + Result* _r = r + 4 * k; + _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] ); + _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] ); + _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] ); + + // copy the result into the output parameter + result[ k ] = _r[ 0 ]; + } + } + else { + for( int k = 0; k < n; k++ ) + result[ k ] = zero; + + for( int b = 0; b < blocks; b++ ) { + const Index offset = b * block_size; + for( int k = 0; k < n; k++ ) { + for( int i = 0; i < block_size; i++ ) + result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) ); + } + } + + for( int k = 0; k < n; k++ ) { + for( Index i = blocks * block_size; i < size; i++ ) + result[ k ] = reduction( result[ k ], dataFetcher( i, k ) ); + } + } +} + template< typename Result, typename DataFetcher, typename Reduction, @@ -46,10 +122,10 @@ reduce( const Result zero, TNL_ASSERT_GT( size, 0, "The size of datasets must be positive." ); TNL_ASSERT_GT( n, 0, "The number of datasets must be positive." ); +#ifdef HAVE_OPENMP constexpr int block_size = 128; const int blocks = size / block_size; -#ifdef HAVE_OPENMP if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { const int threads = TNL::min( blocks, Devices::Host::getMaxThreadsCount() ); #pragma omp parallel num_threads(threads) @@ -107,67 +183,9 @@ reduce( const Result zero, } } } - else { -#endif - if( blocks > 1 ) { - // initialize array for unrolled results - // (it is accessed as a row-major matrix with n rows and 4 columns) - Result r[ n * 4 ]; - for( int k = 0; k < n * 4; k++ ) - r[ k ] = zero; - - // main reduction (explicitly unrolled loop) - for( int b = 0; b < blocks; b++ ) { - const Index offset = b * block_size; - for( int k = 0; k < n; k++ ) { - Result* _r = r + 4 * k; - for( int i = 0; i < block_size; i += 4 ) { - _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( offset + i, k ) ); - _r[ 1 ] = reduction( _r[ 1 ], dataFetcher( offset + i + 1, k ) ); - _r[ 2 ] = reduction( _r[ 2 ], dataFetcher( offset + i + 2, k ) ); - _r[ 3 ] = reduction( _r[ 3 ], dataFetcher( offset + i + 3, k ) ); - } - } - } - - // reduction of the last, incomplete block (not unrolled) - for( int k = 0; k < n; k++ ) { - Result* _r = r + 4 * k; - for( Index i = blocks * block_size; i < size; i++ ) - _r[ 0 ] = reduction( _r[ 0 ], dataFetcher( i, k ) ); - } - - // reduction of unrolled results - for( int k = 0; k < n; k++ ) { - Result* _r = r + 4 * k; - _r[ 0 ] = reduction( _r[ 0 ], _r[ 1 ] ); - _r[ 0 ] = reduction( _r[ 0 ], _r[ 2 ] ); - _r[ 0 ] = reduction( _r[ 0 ], _r[ 3 ] ); - - // copy the result into the output parameter - result[ k ] = _r[ 0 ]; - } - } - else { - for( int k = 0; k < n; k++ ) - result[ k ] = zero; - - for( int b = 0; b < blocks; b++ ) { - const Index offset = b * block_size; - for( int k = 0; k < n; k++ ) { - for( int i = 0; i < block_size; i++ ) - result[ k ] = reduction( result[ k ], dataFetcher( offset + i, k ) ); - } - } - - for( int k = 0; k < n; k++ ) { - for( Index i = blocks * block_size; i < size; i++ ) - result[ k ] = reduction( result[ k ], dataFetcher( i, k ) ); - } - } -#ifdef HAVE_OPENMP - } + else #endif + Multireduction< Devices::Sequential >::reduce( zero, dataFetcher, reduction, size, n, result ); } template< typename Result, @@ -205,7 +223,7 @@ reduce( const Result zero, // transfer the reduced data from device to host std::unique_ptr< Result[] > resultArray{ new Result[ n * reducedSize ] }; - ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, n * reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -216,7 +234,7 @@ reduce( const Result zero, // finish the reduction on the host auto dataFetcherFinish = [&] ( int i, int k ) { return resultArray[ i + k * reducedSize ]; }; - Multireduction< Devices::Host >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult ); + Multireduction< Devices::Sequential >::reduce( zero, dataFetcherFinish, reduction, reducedSize, n, hostResult ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -225,5 +243,4 @@ reduce( const Result zero, }; } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/ParallelFor.h b/src/TNL/Algorithms/ParallelFor.h similarity index 79% rename from src/TNL/ParallelFor.h rename to src/TNL/Algorithms/ParallelFor.h index 04af2740807b9139ca0b8452b9c1b7bc52f5a8c8..6d5e917ba4ac07246322a82c7d5edec38a1cb02b 100644 --- a/src/TNL/ParallelFor.h +++ b/src/TNL/Algorithms/ParallelFor.h @@ -10,9 +10,13 @@ #pragma once +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/CudaDeviceInfo.h> +#include <TNL/Cuda/CheckDevice.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Math.h> /**** @@ -27,12 +31,57 @@ */ namespace TNL { +namespace Algorithms { enum ParallelForMode { SynchronousMode, AsynchronousMode }; - -template< typename Device = Devices::Host, + +template< typename Device = Devices::Sequential, ParallelForMode Mode = SynchronousMode > struct ParallelFor +{ + template< typename Index, + typename Function, + typename... FunctionArgs > + static void exec( Index start, Index end, Function f, FunctionArgs... args ) + { + for( Index i = start; i < end; i++ ) + f( i, args... ); + } +}; + +template< typename Device = Devices::Sequential, + ParallelForMode Mode = SynchronousMode > +struct ParallelFor2D +{ + template< typename Index, + typename Function, + typename... FunctionArgs > + static void exec( Index startX, Index startY, Index endX, Index endY, Function f, FunctionArgs... args ) + { + for( Index j = startY; j < endY; j++ ) + for( Index i = startX; i < endX; i++ ) + f( i, j, args... ); + } +}; + +template< typename Device = Devices::Sequential, + ParallelForMode Mode = SynchronousMode > +struct ParallelFor3D +{ + template< typename Index, + typename Function, + typename... FunctionArgs > + static void exec( Index startX, Index startY, Index startZ, Index endX, Index endY, Index endZ, Function f, FunctionArgs... args ) + { + for( Index k = startZ; k < endZ; k++ ) + for( Index j = startY; j < endY; j++ ) + for( Index i = startX; i < endX; i++ ) + f( i, j, k, args... ); + } +}; + +template< ParallelForMode Mode > +struct ParallelFor< Devices::Host, Mode > { template< typename Index, typename Function, @@ -41,26 +90,23 @@ struct ParallelFor { #ifdef HAVE_OPENMP // Benchmarks show that this is significantly faster compared - // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 )' - if( TNL::Devices::Host::isOMPEnabled() && end - start > 512 ) + // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() && end - start > 512 )' + if( Devices::Host::isOMPEnabled() && end - start > 512 ) { -#pragma omp parallel for + #pragma omp parallel for for( Index i = start; i < end; i++ ) f( i, args... ); } else - for( Index i = start; i < end; i++ ) - f( i, args... ); + ParallelFor< Devices::Sequential >::exec( start, end, f, args... ); #else - for( Index i = start; i < end; i++ ) - f( i, args... ); + ParallelFor< Devices::Sequential >::exec( start, end, f, args... ); #endif } }; -template< typename Device = Devices::Host, - ParallelForMode Mode = SynchronousMode > -struct ParallelFor2D +template< ParallelForMode Mode > +struct ParallelFor2D< Devices::Host, Mode > { template< typename Index, typename Function, @@ -69,30 +115,24 @@ struct ParallelFor2D { #ifdef HAVE_OPENMP // Benchmarks show that this is significantly faster compared - // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' - if( TNL::Devices::Host::isOMPEnabled() ) + // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )' + if( Devices::Host::isOMPEnabled() ) { -#pragma omp parallel for - for( Index j = startY; j < endY; j++ ) - for( Index i = startX; i < endX; i++ ) - f( i, j, args... ); - } - else { + #pragma omp parallel for for( Index j = startY; j < endY; j++ ) for( Index i = startX; i < endX; i++ ) f( i, j, args... ); } + else + ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... ); #else - for( Index j = startY; j < endY; j++ ) - for( Index i = startX; i < endX; i++ ) - f( i, j, args... ); + ParallelFor2D< Devices::Sequential >::exec( startX, startY, endX, endY, f, args... ); #endif } }; -template< typename Device = Devices::Host, - ParallelForMode Mode = SynchronousMode > -struct ParallelFor3D +template< ParallelForMode Mode > +struct ParallelFor3D< Devices::Host, Mode > { template< typename Index, typename Function, @@ -101,27 +141,19 @@ struct ParallelFor3D { #ifdef HAVE_OPENMP // Benchmarks show that this is significantly faster compared - // to '#pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() )' - if( TNL::Devices::Host::isOMPEnabled() ) - { -#pragma omp parallel for collapse(2) - for( Index k = startZ; k < endZ; k++ ) - for( Index j = startY; j < endY; j++ ) - for( Index i = startX; i < endX; i++ ) - f( i, j, k, args... ); - } - else + // to '#pragma omp parallel for if( Devices::Host::isOMPEnabled() )' + if( Devices::Host::isOMPEnabled() ) { + #pragma omp parallel for collapse(2) for( Index k = startZ; k < endZ; k++ ) for( Index j = startY; j < endY; j++ ) for( Index i = startX; i < endX; i++ ) f( i, j, k, args... ); } + else + ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... ); #else - for( Index k = startZ; k < endZ; k++ ) - for( Index j = startY; j < endY; j++ ) - for( Index i = startX; i < endX; i++ ) - f( i, j, k, args... ); + ParallelFor3D< Devices::Sequential >::exec( startX, startY, startZ, endX, endY, endZ, f, args... ); #endif } }; @@ -203,14 +235,14 @@ struct ParallelFor< Devices::Cuda, Mode > if( end > start ) { dim3 blockSize( 256 ); dim3 gridSize; - gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) ); + gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( end - start, blockSize.x ) ); if( (std::size_t) blockSize.x * gridSize.x >= (std::size_t) end - start ) ParallelForKernel< false ><<< gridSize, blockSize >>>( start, end, f, args... ); else { // decrease the grid size and align to the number of multiprocessors - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = TNL::min( desGridSize, Devices::Cuda::getNumberOfBlocks( end - start, blockSize.x ) ); + const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() ); + gridSize.x = TNL::min( desGridSize, Cuda::getNumberOfBlocks( end - start, blockSize.x ) ); ParallelForKernel< true ><<< gridSize, blockSize >>>( start, end, f, args... ); } @@ -253,8 +285,8 @@ struct ParallelFor2D< Devices::Cuda, Mode > blockSize.y = TNL::min( 8, sizeY ); } dim3 gridSize; - gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) ); - gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) ); + gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) ); + gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) ); dim3 gridCount; gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x ); @@ -337,9 +369,9 @@ struct ParallelFor3D< Devices::Cuda, Mode > blockSize.z = TNL::min( 4, sizeZ ); } dim3 gridSize; - gridSize.x = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeX, blockSize.x ) ); - gridSize.y = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeY, blockSize.y ) ); - gridSize.z = TNL::min( Devices::Cuda::getMaxGridSize(), Devices::Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) ); + gridSize.x = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeX, blockSize.x ) ); + gridSize.y = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeY, blockSize.y ) ); + gridSize.z = TNL::min( Cuda::getMaxGridSize(), Cuda::getNumberOfBlocks( sizeZ, blockSize.z ) ); dim3 gridCount; gridCount.x = roundUpDivision( sizeX, blockSize.x * gridSize.x ); @@ -383,4 +415,5 @@ struct ParallelFor3D< Devices::Cuda, Mode > } }; +} // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Algorithms/Reduction.h similarity index 91% rename from src/TNL/Containers/Algorithms/Reduction.h rename to src/TNL/Algorithms/Reduction.h index 83cedb01fcb5a93f4b225c92cc76f9500f72082d..c0d62684d57ccc0fa225f8ec0d56f92e8b0e904d 100644 --- a/src/TNL/Containers/Algorithms/Reduction.h +++ b/src/TNL/Algorithms/Reduction.h @@ -15,11 +15,11 @@ #include <utility> // std::pair #include <functional> // reduction functions like std::plus, std::logical_and, std::logical_or etc. +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> namespace TNL { -namespace Containers { namespace Algorithms { /** @@ -37,6 +37,30 @@ namespace Algorithms { template< typename Device > struct Reduction; +template<> +struct Reduction< Devices::Sequential > +{ + template< typename Index, + typename Result, + typename ReductionOperation, + typename DataFetcher > + static constexpr Result + reduce( const Index size, + const ReductionOperation& reduction, + DataFetcher& dataFetcher, + const Result& zero ); + + template< typename Index, + typename Result, + typename ReductionOperation, + typename DataFetcher > + static constexpr std::pair< Index, Result > + reduceWithArgument( const Index size, + const ReductionOperation& reduction, + DataFetcher& dataFetcher, + const Result& zero ); +}; + template<> struct Reduction< Devices::Host > { @@ -236,7 +260,6 @@ struct Reduction< Devices::Cuda > }; } // namespace Algorithms -} // namespace Containers } // namespace TNL -#include <TNL/Containers/Algorithms/Reduction.hpp> +#include <TNL/Algorithms/Reduction.hpp> diff --git a/src/TNL/Containers/Algorithms/Reduction.hpp b/src/TNL/Algorithms/Reduction.hpp similarity index 73% rename from src/TNL/Containers/Algorithms/Reduction.hpp rename to src/TNL/Algorithms/Reduction.hpp index 229af13797f82a4f27f67bc81a5bfb6886a65604..b07f04445e7481679daa8e119f87adce16fe37df 100644 --- a/src/TNL/Containers/Algorithms/Reduction.hpp +++ b/src/TNL/Algorithms/Reduction.hpp @@ -16,9 +16,9 @@ //#define CUDA_REDUCTION_PROFILING -#include <TNL/Containers/Algorithms/Reduction.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/CudaReductionKernel.h> +#include <TNL/Algorithms/Reduction.h> +#include <TNL/Algorithms/CudaReductionKernel.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> #ifdef CUDA_REDUCTION_PROFILING #include <iostream> @@ -26,7 +26,6 @@ #endif namespace TNL { -namespace Containers { namespace Algorithms { /**** @@ -36,8 +35,115 @@ namespace Algorithms { */ static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256; -//// -// Reduction on host +template< typename Index, + typename Result, + typename ReductionOperation, + typename DataFetcher > +constexpr Result +Reduction< Devices::Sequential >:: +reduce( const Index size, + const ReductionOperation& reduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ + constexpr int block_size = 128; + const int blocks = size / block_size; + + if( blocks > 1 ) { + // initialize array for unrolled results + Result r[ 4 ] = { zero, zero, zero, zero }; + + // main reduction (explicitly unrolled loop) + for( int b = 0; b < blocks; b++ ) { + const Index offset = b * block_size; + for( int i = 0; i < block_size; i += 4 ) { + r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); + r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); + r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); + r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); + } + } + + // reduction of the last, incomplete block (not unrolled) + for( Index i = blocks * block_size; i < size; i++ ) + r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); + + // reduction of unrolled results + r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); + r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); + r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); + return r[ 0 ]; + } + else { + Result result = zero; + for( Index i = 0; i < size; i++ ) + result = reduction( result, dataFetcher( i ) ); + return result; + } +} + +template< typename Index, + typename Result, + typename ReductionOperation, + typename DataFetcher > +constexpr std::pair< Index, Result > +Reduction< Devices::Sequential >:: +reduceWithArgument( const Index size, + const ReductionOperation& reduction, + DataFetcher& dataFetcher, + const Result& zero ) +{ + constexpr int block_size = 128; + const int blocks = size / block_size; + + if( blocks > 1 ) { + // initialize array for unrolled results + Index arg[ 4 ] = { 0, 0, 0, 0 }; + Result r[ 4 ] = { zero, zero, zero, zero }; + bool initialized( false ); + + // main reduction (explicitly unrolled loop) + for( int b = 0; b < blocks; b++ ) { + const Index offset = b * block_size; + for( int i = 0; i < block_size; i += 4 ) { + if( ! initialized ) + { + arg[ 0 ] = offset + i; + arg[ 1 ] = offset + i + 1; + arg[ 2 ] = offset + i + 2; + arg[ 3 ] = offset + i + 3; + r[ 0 ] = dataFetcher( offset + i ); + r[ 1 ] = dataFetcher( offset + i + 1 ); + r[ 2 ] = dataFetcher( offset + i + 2 ); + r[ 3 ] = dataFetcher( offset + i + 3 ); + initialized = true; + continue; + } + reduction( arg[ 0 ], offset + i, r[ 0 ], dataFetcher( offset + i ) ); + reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) ); + reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) ); + reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) ); + } + } + + // reduction of the last, incomplete block (not unrolled) + for( Index i = blocks * block_size; i < size; i++ ) + reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) ); + + // reduction of unrolled results + reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] ); + reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] ); + reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] ); + return std::make_pair( arg[ 0 ], r[ 0 ] ); + } + else { + std::pair< Index, Result > result( 0, dataFetcher( 0 ) ); + for( Index i = 1; i < size; i++ ) + reduction( result.first, i, result.second, dataFetcher( i ) ); + return result; + } +} + template< typename Index, typename Result, typename ReductionOperation, @@ -49,10 +155,10 @@ reduce( const Index size, DataFetcher& dataFetcher, const Result& zero ) { +#ifdef HAVE_OPENMP constexpr int block_size = 128; const int blocks = size / block_size; -#ifdef HAVE_OPENMP if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable Result result = zero; @@ -93,42 +199,9 @@ reduce( const Index size, } return result; } - else { -#endif - if( blocks > 1 ) { - // initialize array for unrolled results - Result r[ 4 ] = { zero, zero, zero, zero }; - - // main reduction (explicitly unrolled loop) - for( int b = 0; b < blocks; b++ ) { - const Index offset = b * block_size; - for( int i = 0; i < block_size; i += 4 ) { - r[ 0 ] = reduction( r[ 0 ], dataFetcher( offset + i ) ); - r[ 1 ] = reduction( r[ 1 ], dataFetcher( offset + i + 1 ) ); - r[ 2 ] = reduction( r[ 2 ], dataFetcher( offset + i + 2 ) ); - r[ 3 ] = reduction( r[ 3 ], dataFetcher( offset + i + 3 ) ); - } - } - - // reduction of the last, incomplete block (not unrolled) - for( Index i = blocks * block_size; i < size; i++ ) - r[ 0 ] = reduction( r[ 0 ], dataFetcher( i ) ); - - // reduction of unrolled results - r[ 0 ] = reduction( r[ 0 ], r[ 2 ] ); - r[ 1 ] = reduction( r[ 1 ], r[ 3 ] ); - r[ 0 ] = reduction( r[ 0 ], r[ 1 ] ); - return r[ 0 ]; - } - else { - Result result = zero; - for( Index i = 0; i < size; i++ ) - result = reduction( result, dataFetcher( i ) ); - return result; - } -#ifdef HAVE_OPENMP - } + else #endif + return Reduction< Devices::Sequential >::reduce( size, reduction, dataFetcher, zero ); } template< typename Index, @@ -142,10 +215,10 @@ reduceWithArgument( const Index size, DataFetcher& dataFetcher, const Result& zero ) { +#ifdef HAVE_OPENMP constexpr int block_size = 128; const int blocks = size / block_size; -#ifdef HAVE_OPENMP if( Devices::Host::isOMPEnabled() && blocks >= 2 ) { // global result variable std::pair< Index, Result > result( -1, zero ); @@ -202,57 +275,9 @@ reduceWithArgument( const Index size, } return result; } - else { -#endif - if( blocks > 1 ) { - // initialize array for unrolled results - Index arg[ 4 ] = { 0, 0, 0, 0 }; - Result r[ 4 ] = { zero, zero, zero, zero }; - bool initialized( false ); - - // main reduction (explicitly unrolled loop) - for( int b = 0; b < blocks; b++ ) { - const Index offset = b * block_size; - for( int i = 0; i < block_size; i += 4 ) { - if( ! initialized ) - { - arg[ 0 ] = offset + i; - arg[ 1 ] = offset + i + 1; - arg[ 2 ] = offset + i + 2; - arg[ 3 ] = offset + i + 3; - r[ 0 ] = dataFetcher( offset + i ); - r[ 1 ] = dataFetcher( offset + i + 1 ); - r[ 2 ] = dataFetcher( offset + i + 2 ); - r[ 3 ] = dataFetcher( offset + i + 3 ); - initialized = true; - continue; - } - reduction( arg[ 0 ], offset + i, r[ 0 ], dataFetcher( offset + i ) ); - reduction( arg[ 1 ], offset + i + 1, r[ 1 ], dataFetcher( offset + i + 1 ) ); - reduction( arg[ 2 ], offset + i + 2, r[ 2 ], dataFetcher( offset + i + 2 ) ); - reduction( arg[ 3 ], offset + i + 3, r[ 3 ], dataFetcher( offset + i + 3 ) ); - } - } - - // reduction of the last, incomplete block (not unrolled) - for( Index i = blocks * block_size; i < size; i++ ) - reduction( arg[ 0 ], i, r[ 0 ], dataFetcher( i ) ); - - // reduction of unrolled results - reduction( arg[ 0 ], arg[ 2 ], r[ 0 ], r[ 2 ] ); - reduction( arg[ 1 ], arg[ 3 ], r[ 1 ], r[ 3 ] ); - reduction( arg[ 0 ], arg[ 1 ], r[ 0 ], r[ 1 ] ); - return std::make_pair( arg[ 0 ], r[ 0 ] ); - } - else { - std::pair< Index, Result > result( 0, dataFetcher( 0 ) ); - for( Index i = 1; i < size; i++ ) - reduction( result.first, i, result.second, dataFetcher( i ) ); - return result; - } -#ifdef HAVE_OPENMP - } + else #endif + return Reduction< Devices::Sequential >::reduceWithArgument( size, reduction, dataFetcher, zero ); } template< typename Index, @@ -310,7 +335,7 @@ reduce( const Index size, new Result[ reducedSize ] #endif }; - ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -321,7 +346,7 @@ reduce( const Index size, // finish the reduction on the host auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; - const Result result = Reduction< Devices::Host >::reduce( reducedSize, reduction, fetch, zero ); + const Result result = Reduction< Devices::Sequential >::reduce( reducedSize, reduction, fetch, zero ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -415,8 +440,8 @@ reduceWithArgument( const Index size, new Index[ reducedSize ] #endif }; - ArrayOperations< Devices::Host, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize ); - ArrayOperations< Devices::Host, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( resultArray.get(), deviceAux1, reducedSize ); + MultiDeviceMemoryOperations< void, Devices::Cuda >::copy( indexArray.get(), deviceIndexes, reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -427,7 +452,7 @@ reduceWithArgument( const Index size, // finish the reduction on the host // auto fetch = [&] ( Index i ) { return resultArray[ i ]; }; -// const Result result = Reduction< Devices::Host >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero ); +// const Result result = Reduction< Devices::Sequential >::reduceWithArgument( reducedSize, argument, reduction, fetch, zero ); for( Index i = 1; i < reducedSize; i++ ) reduction( indexArray[ 0 ], indexArray[ i ], resultArray[ 0 ], resultArray[ i ] ); @@ -453,5 +478,4 @@ reduceWithArgument( const Index size, } } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/Scan.h b/src/TNL/Algorithms/Scan.h similarity index 71% rename from src/TNL/Containers/Algorithms/Scan.h rename to src/TNL/Algorithms/Scan.h index 5587c627307da329db40eef54b41ba3c2d4e994a..81a5d2f7e753b64391e134e93a0c5bb652e54310 100644 --- a/src/TNL/Containers/Algorithms/Scan.h +++ b/src/TNL/Algorithms/Scan.h @@ -12,17 +12,17 @@ #pragma once +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> namespace TNL { -namespace Containers { namespace Algorithms { /** * \brief Scan (or prefix sum) type - inclusive or exclusive. - * - * See \ref TNL::Containers::Algorithms::Scan + * + * See \ref TNL::Algorithms::Scan. */ enum class ScanType { Exclusive, @@ -31,22 +31,22 @@ enum class ScanType { /** * \brief Computes scan (or prefix sum) on a vector. - * - * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence + * + * [Scan (or prefix sum)](https://en.wikipedia.org/wiki/Prefix_sum) operation turns a sequence * \f$a_1, \ldots, a_n\f$ into a sequence \f$s_1, \ldots, s_n\f$ defined as - * + * * \f[ * s_i = \sum_{j=1}^i a_i. * \f] * Exclusive scan (or prefix sum) is defined as - * + * * \f[ * \sigma_i = \sum_{j=1}^{i-1} a_i. * \f] - * + * * \tparam Device parameter says on what device the reduction is gonna be performed. * \tparam Type parameter says if inclusive or exclusive is scan is to be computed. - * + * * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >. */ template< typename Device, @@ -55,41 +55,41 @@ struct Scan; /** * \brief Computes segmented scan (or prefix sum) on a vector. - * + * * Segmented scan is a modification of common scan. In this case the sequence of * numbers in hand is divided into segments like this, for example - * + * * ``` * [1,3,5][2,4,6,9][3,5],[3,6,9,12,15] * ``` - * + * * and we want to compute inclusive or exclusive scan of each segment. For inclusive segmented prefix sum we get - * + * * ``` * [1,4,9][2,6,12,21][3,8][3,9,18,30,45] * ``` - * + * * and for exclusive segmented prefix sum it is - * + * * ``` * [0,1,4][0,2,6,12][0,3][0,3,9,18,30] * ``` - * + * * In addition to common scan, we need to encode the segments of the input sequence. * It is done by auxiliary flags array (it can be array of booleans) having `1` at the * beginning of each segment and `0` on all other positions. In our example, it would be like this: - * + * * ``` * [1,0,0,1,0,0,0,1,0,1,0,0, 0, 0] * [1,3,5,2,4,6,9,3,5,3,6,9,12,15] - * + * * ``` - * + * * \tparam Device parameter says on what device the reduction is gonna be performed. * \tparam Type parameter says if inclusive or exclusive is scan is to be computed. - * + * * See \ref Scan< Devices::Host, Type > and \ref Scan< Devices::Cuda, Type >. - * + * * **Note: Segmented scan is not implemented for CUDA yet.** */ template< typename Device, @@ -97,15 +97,75 @@ template< typename Device, struct SegmentedScan; +template< ScanType Type > +struct Scan< Devices::Sequential, Type > +{ + /** + * \brief Computes scan (prefix sum) sequentially. + * + * \tparam Vector type vector being used for the scan. + * \tparam Reduction lambda function defining the reduction operation + * + * \param v input vector, the result of scan is stored in the same vector + * \param begin the first element in the array to be scanned + * \param end the last element in the array to be scanned + * \param reduction lambda function implementing the reduction operation + * \param zero is the idempotent element for the reduction operation, i.e. element which + * does not change the result of the reduction. + * + * The reduction lambda function takes two variables which are supposed to be reduced: + * + * ``` + * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * ``` + * + * \par Example + * + * \include ReductionAndScan/ScanExample.cpp + * + * \par Output + * + * \include ScanExample.out + */ + template< typename Vector, + typename Reduction > + static void + perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ); + + template< typename Vector, + typename Reduction > + static auto + performFirstPhase( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ); + + template< typename Vector, + typename BlockShifts, + typename Reduction > + static void + performSecondPhase( Vector& v, + const BlockShifts& blockShifts, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType shift ); +}; + template< ScanType Type > struct Scan< Devices::Host, Type > { /** - * \brief Computes scan (prefix sum) on CPU. - * + * \brief Computes scan (prefix sum) using OpenMP. + * * \tparam Vector type vector being used for the scan. * \tparam Reduction lambda function defining the reduction operation - * + * * \param v input vector, the result of scan is stored in the same vector * \param begin the first element in the array to be scanned * \param end the last element in the array to be scanned @@ -162,10 +222,10 @@ struct Scan< Devices::Cuda, Type > { /** * \brief Computes scan (prefix sum) on GPU. - * + * * \tparam Vector type vector being used for the scan. * \tparam Reduction lambda function defining the reduction operation - * + * * \param v input vector, the result of scan is stored in the same vector * \param begin the first element in the array to be scanned * \param end the last element in the array to be scanned @@ -217,16 +277,60 @@ struct Scan< Devices::Cuda, Type > const typename Vector::RealType shift ); }; +template< ScanType Type > +struct SegmentedScan< Devices::Sequential, Type > +{ + /** + * \brief Computes segmented scan (prefix sum) sequentially. + * + * \tparam Vector type vector being used for the scan. + * \tparam Reduction lambda function defining the reduction operation + * \tparam Flags array type containing zeros and ones defining the segments begining + * + * \param v input vector, the result of scan is stored in the same vector + * \param flags is an array with zeros and ones defining the segments begining + * \param begin the first element in the array to be scanned + * \param end the last element in the array to be scanned + * \param reduction lambda function implementing the reduction operation + * \param zero is the idempotent element for the reduction operation, i.e. element which + * does not change the result of the reduction. + * + * The reduction lambda function takes two variables which are supposed to be reduced: + * + * ``` + * auto reduction = [] __cuda_callable__ ( const Result& a, const Result& b ) { return ... }; + * ``` + * + * \par Example + * + * \include ReductionAndScan/SegmentedScanExample.cpp + * + * \par Output + * + * \include SegmentedScanExample.out + */ + template< typename Vector, + typename Reduction, + typename Flags > + static void + perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ); +}; + template< ScanType Type > struct SegmentedScan< Devices::Host, Type > { /** - * \brief Computes segmented scan (prefix sum) on CPU. - * + * \brief Computes segmented scan (prefix sum) using OpenMP. + * * \tparam Vector type vector being used for the scan. * \tparam Reduction lambda function defining the reduction operation * \tparam Flags array type containing zeros and ones defining the segments begining - * + * * \param v input vector, the result of scan is stored in the same vector * \param flags is an array with zeros and ones defining the segments begining * \param begin the first element in the array to be scanned @@ -266,11 +370,11 @@ struct SegmentedScan< Devices::Cuda, Type > { /** * \brief Computes segmented scan (prefix sum) on GPU. - * + * * \tparam Vector type vector being used for the scan. * \tparam Reduction lambda function defining the reduction operation * \tparam Flags array type containing zeros and ones defining the segments begining - * + * * \param v input vector, the result of scan is stored in the same vector * \param flags is an array with zeros and ones defining the segments begining * \param begin the first element in the array to be scanned @@ -292,7 +396,7 @@ struct SegmentedScan< Devices::Cuda, Type > * \par Output * * \include SegmentedScanExample.out - * + * * **Note: Segmented scan is not implemented for CUDA yet.** */ template< typename Vector, @@ -308,7 +412,6 @@ struct SegmentedScan< Devices::Cuda, Type > }; } // namespace Algorithms -} // namespace Containers } // namespace TNL -#include <TNL/Containers/Algorithms/Scan.hpp> +#include <TNL/Algorithms/Scan.hpp> diff --git a/src/TNL/Containers/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp similarity index 77% rename from src/TNL/Containers/Algorithms/Scan.hpp rename to src/TNL/Algorithms/Scan.hpp index d7d2b181150344b44fa4403c26c6c5f5ba3b1eb3..7b6d31ece513144c5b0cec9947a232b940fb5e30 100644 --- a/src/TNL/Containers/Algorithms/Scan.hpp +++ b/src/TNL/Algorithms/Scan.hpp @@ -17,14 +17,85 @@ #include <TNL/Assert.h> #include <TNL/Containers/Array.h> #include <TNL/Containers/StaticArray.h> -#include <TNL/Containers/Algorithms/CudaScanKernel.h> +#include <TNL/Algorithms/CudaScanKernel.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { -namespace Containers { namespace Algorithms { +template< ScanType Type > + template< typename Vector, + typename Reduction > +void +Scan< Devices::Sequential, Type >:: +perform( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ) +{ + // sequential prefix-sum does not need a second phase + performFirstPhase( v, begin, end, reduction, zero ); +} + +template< ScanType Type > + template< typename Vector, + typename Reduction > +auto +Scan< Devices::Sequential, Type >:: +performFirstPhase( Vector& v, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ) +{ + using RealType = typename Vector::RealType; + using IndexType = typename Vector::IndexType; + + // FIXME: StaticArray does not have getElement() which is used in DistributedScan +// return Containers::StaticArray< 1, RealType > block_sums; + Containers::Array< RealType, Devices::Host > block_sums( 1 ); + block_sums[ 0 ] = zero; + + if( Type == ScanType::Inclusive ) { + for( IndexType i = begin + 1; i < end; i++ ) + v[ i ] = reduction( v[ i ], v[ i - 1 ] ); + block_sums[ 0 ] = v[ end - 1 ]; + } + else // Exclusive prefix sum + { + RealType aux = zero; + for( IndexType i = begin; i < end; i++ ) { + const RealType x = v[ i ]; + v[ i ] = aux; + aux = reduction( aux, x ); + } + block_sums[ 0 ] = aux; + } + + return block_sums; +} + +template< ScanType Type > + template< typename Vector, + typename BlockShifts, + typename Reduction > +void +Scan< Devices::Sequential, Type >:: +performSecondPhase( Vector& v, + const BlockShifts& blockShifts, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType shift ) +{ + using IndexType = typename Vector::IndexType; + + for( IndexType i = begin; i < end; i++ ) + v[ i ] = reduction( v[ i ], shift ); +} + template< ScanType Type > template< typename Vector, typename Reduction > @@ -40,8 +111,7 @@ perform( Vector& v, const auto blockShifts = performFirstPhase( v, begin, end, reduction, zero ); performSecondPhase( v, blockShifts, begin, end, reduction, zero ); #else - // sequential prefix-sum does not need a second phase - performFirstPhase( v, begin, end, reduction, zero ); + Scan< Devices::Sequential, Type >::perform( v, begin, end, reduction, zero ); #endif } @@ -56,12 +126,12 @@ performFirstPhase( Vector& v, const Reduction& reduction, const typename Vector::RealType zero ) { +#ifdef HAVE_OPENMP using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; -#ifdef HAVE_OPENMP const int threads = Devices::Host::getMaxThreadsCount(); - Array< RealType, Devices::Host > block_sums( threads + 1 ); + Containers::Array< RealType > block_sums( threads + 1 ); block_sums[ 0 ] = zero; #pragma omp parallel num_threads(threads) @@ -99,28 +169,7 @@ performFirstPhase( Vector& v, // block_sums now contains shift values for each block - to be used in the second phase return block_sums; #else - // FIXME: StaticArray does not have getElement() which is used in DistributedScan -// return StaticArray< 1, RealType > block_sums; - Array< RealType, Devices::Host > block_sums( 1 ); - block_sums[ 0 ] = zero; - - if( Type == ScanType::Inclusive ) { - for( IndexType i = begin + 1; i < end; i++ ) - v[ i ] = reduction( v[ i ], v[ i - 1 ] ); - block_sums[ 0 ] = v[ end - 1 ]; - } - else // Exclusive prefix sum - { - RealType aux = zero; - for( IndexType i = begin; i < end; i++ ) { - const RealType x = v[ i ]; - v[ i ] = aux; - aux = reduction( aux, x ); - } - block_sums[ 0 ] = aux; - } - - return block_sums; + return Scan< Devices::Sequential, Type >::performFirstPhase( v, begin, end, reduction, zero ); #endif } @@ -137,10 +186,10 @@ performSecondPhase( Vector& v, const Reduction& reduction, const typename Vector::RealType shift ) { +#ifdef HAVE_OPENMP using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; -#ifdef HAVE_OPENMP const int threads = blockShifts.getSize() - 1; // launch exactly the same number of threads as in the first phase @@ -155,8 +204,7 @@ performSecondPhase( Vector& v, v[ i ] = reduction( v[ i ], offset ); } #else - for( IndexType i = begin; i < end; i++ ) - v[ i ] = reduction( v[ i ], shift ); + Scan< Devices::Sequential, Type >::performSecondPhase( v, blockShifts, begin, end, reduction, shift ); #endif } @@ -246,7 +294,7 @@ template< ScanType Type > typename Reduction, typename Flags > void -SegmentedScan< Devices::Host, Type >:: +SegmentedScan< Devices::Sequential, Type >:: perform( Vector& v, Flags& flags, const typename Vector::IndexType begin, @@ -257,7 +305,6 @@ perform( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - // TODO: parallelize with OpenMP if( Type == ScanType::Inclusive ) { for( IndexType i = begin + 1; i < end; i++ ) @@ -279,6 +326,27 @@ perform( Vector& v, } } +template< ScanType Type > + template< typename Vector, + typename Reduction, + typename Flags > +void +SegmentedScan< Devices::Host, Type >:: +perform( Vector& v, + Flags& flags, + const typename Vector::IndexType begin, + const typename Vector::IndexType end, + const Reduction& reduction, + const typename Vector::RealType zero ) +{ +#ifdef HAVE_OPENMP + // TODO: parallelize with OpenMP + SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero ); +#else + SegmentedScan< Devices::Sequential, Type >::perform( v, flags, begin, end, reduction, zero ); +#endif +} + template< ScanType Type > template< typename Vector, typename Reduction, @@ -296,12 +364,11 @@ perform( Vector& v, using RealType = typename Vector::RealType; using IndexType = typename Vector::IndexType; - throw Exceptions::NotImplementedError( "Segmented prefix sum is not implemented for CUDA." ); + throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) is not implemented for CUDA." ); #else throw Exceptions::CudaSupportMissing(); #endif } } // namespace Algorithms -} // namespace Containers } // namespace TNL diff --git a/src/TNL/StaticFor.h b/src/TNL/Algorithms/StaticFor.h similarity index 95% rename from src/TNL/StaticFor.h rename to src/TNL/Algorithms/StaticFor.h index 990036dfc0090708851468e03e991a30a07cc835..c7404545840143bd053ed371c5813a7a0feaa185 100644 --- a/src/TNL/StaticFor.h +++ b/src/TNL/Algorithms/StaticFor.h @@ -10,9 +10,10 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { +namespace Algorithms { // Manual unrolling does not make sense for loops with a large iterations // count. For a very large iterations count it would trigger the compiler's @@ -57,4 +58,5 @@ struct StaticFor< Begin, End, false > } }; +} // namespace Algorithms } // namespace TNL diff --git a/src/TNL/StaticVectorFor.h b/src/TNL/Algorithms/StaticVectorFor.h similarity index 97% rename from src/TNL/StaticVectorFor.h rename to src/TNL/Algorithms/StaticVectorFor.h index 59af0fcb8256619d41014be4fa21023fee4679e2..664f97aed95651249447788d62a6f19be8855bd6 100644 --- a/src/TNL/StaticVectorFor.h +++ b/src/TNL/Algorithms/StaticVectorFor.h @@ -13,6 +13,7 @@ #include <TNL/Containers/StaticVector.h> namespace TNL { +namespace Algorithms { struct StaticVectorFor { @@ -48,4 +49,5 @@ struct StaticVectorFor } }; +} // namespace Algorithms } // namespace TNL diff --git a/src/TNL/TemplateStaticFor.h b/src/TNL/Algorithms/TemplateStaticFor.h similarity index 97% rename from src/TNL/TemplateStaticFor.h rename to src/TNL/Algorithms/TemplateStaticFor.h index 88ad764fd9b78d0348469a115ee1cb83ecb7993b..753ad9b2618b2704292517e9b74ffff7192d22b7 100644 --- a/src/TNL/TemplateStaticFor.h +++ b/src/TNL/Algorithms/TemplateStaticFor.h @@ -13,9 +13,10 @@ #include <utility> #include <type_traits> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { +namespace Algorithms { namespace detail { template< typename IndexType, @@ -89,4 +90,5 @@ struct TemplateStaticFor } }; +} // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Allocators/Cuda.h b/src/TNL/Allocators/Cuda.h index 74ebb840432136d9033a17a86684607098a80d86..1b648f1ce3818978e086f26e64128536f40a8806 100644 --- a/src/TNL/Allocators/Cuda.h +++ b/src/TNL/Allocators/Cuda.h @@ -12,7 +12,9 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Exceptions/CudaBadAlloc.h> +#include <TNL/Exceptions/CudaSupportMissing.h> +#include <TNL/Cuda/CheckDevice.h> namespace TNL { namespace Allocators { diff --git a/src/TNL/Allocators/CudaHost.h b/src/TNL/Allocators/CudaHost.h index 284c91fe9b8dbc7abe8e3d4685ef1d7551d19a89..9047e0b9af632b9f6fd466352d2cd3659f67210a 100644 --- a/src/TNL/Allocators/CudaHost.h +++ b/src/TNL/Allocators/CudaHost.h @@ -12,7 +12,9 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Exceptions/CudaBadAlloc.h> +#include <TNL/Exceptions/CudaSupportMissing.h> +#include <TNL/Cuda/CheckDevice.h> namespace TNL { namespace Allocators { diff --git a/src/TNL/Allocators/CudaManaged.h b/src/TNL/Allocators/CudaManaged.h index db29f86cb618bf79e4f1c0fa0ac1ad2750d476bc..bb878ca66bef97491c6db407128c7c3322fdce7a 100644 --- a/src/TNL/Allocators/CudaManaged.h +++ b/src/TNL/Allocators/CudaManaged.h @@ -12,7 +12,9 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Exceptions/CudaBadAlloc.h> +#include <TNL/Exceptions/CudaSupportMissing.h> +#include <TNL/Cuda/CheckDevice.h> namespace TNL { namespace Allocators { diff --git a/src/TNL/Allocators/Default.h b/src/TNL/Allocators/Default.h index 6906a905c3a82d3e2400c4ba6a767848bf1be061..109539d0c92def3628d288b97b7ea82681b5df8b 100644 --- a/src/TNL/Allocators/Default.h +++ b/src/TNL/Allocators/Default.h @@ -14,10 +14,9 @@ #include <TNL/Allocators/Host.h> #include <TNL/Allocators/Cuda.h> -#include <TNL/Allocators/MIC.h> +#include <TNL/Devices/Sequential.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> namespace TNL { namespace Allocators { @@ -29,28 +28,28 @@ namespace Allocators { template< typename Device > struct Default; -//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host. +//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Sequential. template<> -struct Default< Devices::Host > +struct Default< Devices::Sequential > { template< typename T > using Allocator = Allocators::Host< T >; }; -//! Sets \ref Allocators::Cuda as the default allocator for \ref Devices::Cuda. +//! Sets \ref Allocators::Host as the default allocator for \ref Devices::Host. template<> -struct Default< Devices::Cuda > +struct Default< Devices::Host > { template< typename T > - using Allocator = Allocators::Cuda< T >; + using Allocator = Allocators::Host< T >; }; -//! Sets \ref Allocators::MIC as the default allocator for \ref Devices::MIC. +//! Sets \ref Allocators::Cuda as the default allocator for \ref Devices::Cuda. template<> -struct Default< Devices::MIC > +struct Default< Devices::Cuda > { template< typename T > - using Allocator = Allocators::MIC< T >; + using Allocator = Allocators::Cuda< T >; }; } // namespace Allocators diff --git a/src/TNL/Allocators/MIC.h b/src/TNL/Allocators/MIC.h deleted file mode 100644 index c3599f449cd85f9f83c0ef0e5974bb015d04a6ef..0000000000000000000000000000000000000000 --- a/src/TNL/Allocators/MIC.h +++ /dev/null @@ -1,100 +0,0 @@ -/*************************************************************************** - MIC.h - description - ------------------- - begin : Jul 2, 2019 - copyright : (C) 2019 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Jakub Klinkovsky - -#pragma once - -#include <TNL/Devices/MIC.h> -#include <TNL/Exceptions/MICSupportMissing.h> - -namespace TNL { -namespace Allocators { - -/** - * \brief Allocator for the MIC device memory space. - */ -template< class T > -struct MIC -{ - using value_type = T; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - MIC() = default; - MIC( const MIC& ) = default; - MIC( MIC&& ) = default; - - MIC& operator=( const MIC& ) = default; - MIC& operator=( MIC&& ) = default; - - template< class U > - MIC( const MIC< U >& ) - {} - - template< class U > - MIC( MIC< U >&& ) - {} - - template< class U > - MIC& operator=( const MIC< U >& ) - { - return *this; - } - - template< class U > - MIC& operator=( MIC< U >&& ) - { - return *this; - } - - value_type* allocate( size_type size ) - { -#ifdef HAVE_MIC - Devices::MICHider<void> hide_ptr; - #pragma offload target(mic) out(hide_ptr) in(size) - { - hide_ptr.pointer = malloc(size * sizeof(value_type)); - } - return hide_ptr.pointer; -#else - throw Exceptions::MICSupportMissing(); -#endif - } - - void deallocate(value_type* ptr, size_type) - { -#ifdef HAVE_MIC - Devices::MICHider<void> hide_ptr; - hide_ptr.pointer=ptr; - #pragma offload target(mic) in(hide_ptr) - { - free(hide_ptr.pointer); - } -#else - throw Exceptions::MICSupportMissing(); -#endif - } -}; - -template<class T1, class T2> -bool operator==(const MIC<T1>&, const MIC<T2>&) -{ - return true; -} - -template<class T1, class T2> -bool operator!=(const MIC<T1>& lhs, const MIC<T2>& rhs) -{ - return !(lhs == rhs); -} - -} // namespace Allocators -} // namespace TNL diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h index 27f3b11b28ed46a9741c3593c573a243f1e0a81d..df862956219ac03e1d1e1fa27e1c67a0e1035ad5 100644 --- a/src/TNL/Assert.h +++ b/src/TNL/Assert.h @@ -38,7 +38,7 @@ #define TNL_NVCC_HD_WARNING_DISABLE #endif -#if defined(NDEBUG) || defined(HAVE_MIC) +#ifdef NDEBUG // empty macros for optimized build /** @@ -120,7 +120,7 @@ #include <iostream> #include <stdio.h> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Debugging/StackBacktrace.h> namespace TNL { diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h index ca36f9676e34a76f8427c39bbb190954e701ea5d..e84236287cb28f7cfa0823154201475f53c43be8 100644 --- a/src/TNL/Atomic.h +++ b/src/TNL/Atomic.h @@ -16,13 +16,37 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/param-types.h> + +// double-precision atomicAdd function for Maxwell and older GPUs +// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions +#ifdef HAVE_CUDA +#if __CUDA_ARCH__ < 600 +namespace { + __device__ double atomicAdd(double* address, double val) + { + unsigned long long int* address_as_ull = + (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + + __longlong_as_double(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); + } +} // namespace +#endif +#endif namespace TNL { template< typename T, typename Device > -class Atomic -{}; +class Atomic; template< typename T > class Atomic< T, Devices::Host > @@ -48,14 +72,6 @@ public: return *this; } - // just for compatibility with TNL::Containers::Array... - static String getType() - { - return "Atomic< " + - TNL::getType< T >() + ", " + - Devices::Host::getDeviceType() + " >"; - } - // CAS loops for updating maximum and minimum // reference: https://stackoverflow.com/a/16190791 T fetch_max( T value ) noexcept @@ -120,14 +136,6 @@ public: return *this; } - // just for compatibility with TNL::Containers::Array... - static String getType() - { - return "Atomic< " + - TNL::getType< T >() + ", " + - Devices::Cuda::getDeviceType() + " >"; - } - bool is_lock_free() const noexcept { return true; diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h index 926fa329a6e88b3d84406f464aa3e82057e3ef24..0aa14a9ece5d518ce5e142898cea56593d375189 100644 --- a/src/TNL/Communicators/MpiCommunicator.h +++ b/src/TNL/Communicators/MpiCommunicator.h @@ -24,7 +24,7 @@ #include <unistd.h> // getpid #ifdef HAVE_CUDA - #include <TNL/Devices/Cuda.h> + #include <TNL/Cuda/CheckDevice.h> typedef struct __attribute__((__packed__)) { char name[MPI_MAX_PROCESSOR_NAME]; diff --git a/src/TNL/Config/ConfigDescription.h b/src/TNL/Config/ConfigDescription.h index dc32c16840d1b0d8f1e7d479942de1e32f01fa19..648db1d445de1ef7362e0265927226e4ec734887 100644 --- a/src/TNL/Config/ConfigDescription.h +++ b/src/TNL/Config/ConfigDescription.h @@ -14,11 +14,9 @@ #include <string> #include <vector> #include <memory> -#include "make_unique.h" #include <TNL/Assert.h> #include <TNL/String.h> -#include <TNL/param-types.h> #include <TNL/Config/ConfigEntryType.h> #include <TNL/Config/ConfigEntry.h> #include <TNL/Config/ConfigEntryList.h> @@ -144,7 +142,7 @@ public: TNL_ASSERT_TRUE( this->currentEntry, "there is no current entry" ); if( isCurrentEntryList ) { ConfigEntryList< EntryType >& entry = dynamic_cast< ConfigEntryList< EntryType >& >( *currentEntry ); - entry.getEnumValues().push_back( entryEnum ); + entry.getEnumValues().push_back( entryEnum ); } else { ConfigEntry< EntryType >& entry = dynamic_cast< ConfigEntry< EntryType >& >( *currentEntry ); @@ -218,7 +216,7 @@ public: std::cerr << "Asking for the default value of unknown parameter." << std::endl; return nullptr; } - + //! Returns zero pointer if there is no default value template< class T > T* getDefaultValue( const String& name ) @@ -256,55 +254,59 @@ public: if( entries[ i ]->hasDefaultValue && ! parameter_container.checkParameter( entry_name ) ) { - if( entries[ i ]->getEntryType() == "String" ) + if( entries[ i ]->getEntryType() == "TNL::String" ) { ConfigEntry< String >& entry = dynamic_cast< ConfigEntry< String >& >( *entries[ i ] ); parameter_container.addParameter< String >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "bool" ) + else if( entries[ i ]->getEntryType() == "bool" ) { ConfigEntry< bool >& entry = dynamic_cast< ConfigEntry< bool >& >( *entries[ i ] ); parameter_container.addParameter< bool >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "int" ) + else if( entries[ i ]->getEntryType() == "int" ) { ConfigEntry< int >& entry = dynamic_cast< ConfigEntry< int >& >( *entries[ i ] ); parameter_container.addParameter< int >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "double" ) + else if( entries[ i ]->getEntryType() == "double" ) { ConfigEntry< double >& entry = dynamic_cast< ConfigEntry< double >& >( *entries[ i ] ); parameter_container.addParameter< double >( entry_name, entry.defaultValue ); continue; } - - if( entries[ i ]->getEntryType() == "ConfigEntryList< String >" ) + else if( entries[ i ]->getEntryType() == "ConfigEntryList< TNL::String >" ) { ConfigEntryList< String >& entry = dynamic_cast< ConfigEntryList< String >& >( *entries[ i ] ); parameter_container.addList< String >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" ) + else if( entries[ i ]->getEntryType() == "ConfigEntryList< bool >" ) { ConfigEntryList< bool >& entry = dynamic_cast< ConfigEntryList< bool >& >( *entries[ i ] ); parameter_container.addList< bool >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" ) + else if( entries[ i ]->getEntryType() == "ConfigEntryList< int >" ) { ConfigEntryList< int >& entry = dynamic_cast< ConfigEntryList< int >& >( *entries[ i ] ); parameter_container.addList< int >( entry_name, entry.defaultValue ); continue; } - if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" ) + else if( entries[ i ]->getEntryType() == "ConfigEntryList< double >" ) { ConfigEntryList< double >& entry = dynamic_cast< ConfigEntryList< double >& >( *entries[ i ] ); parameter_container.addList< double >( entry_name, entry.defaultValue ); continue; } + else + { + throw std::runtime_error( "Method ConfigDescription::addMissingEntries encountered " + "unsupported entry type: " + entries[ i ]->getEntryType() ); + } } } } diff --git a/src/TNL/Config/ConfigEntry.h b/src/TNL/Config/ConfigEntry.h index 1b56574cc3983a3425ab023e7466c699fbe9f982..370366e5ea3c7e906f948417d66c32de41b01aea 100644 --- a/src/TNL/Config/ConfigEntry.h +++ b/src/TNL/Config/ConfigEntry.h @@ -12,6 +12,7 @@ #include <vector> +#include <TNL/TypeInfo.h> #include <TNL/Config/ConfigEntryBase.h> namespace TNL { diff --git a/src/TNL/Config/ConfigEntryList.h b/src/TNL/Config/ConfigEntryList.h index 50284e37c1195916ab5c2ccfd3f72dd4d6ed7ed8..86f2642349ad470f2a8fd268ad117b1d3baf268a 100644 --- a/src/TNL/Config/ConfigEntryList.h +++ b/src/TNL/Config/ConfigEntryList.h @@ -12,6 +12,7 @@ #include <vector> +#include <TNL/TypeInfo.h> #include <TNL/Config/ConfigEntryBase.h> namespace TNL { diff --git a/src/TNL/Config/ParameterContainer.h b/src/TNL/Config/ParameterContainer.h index b298234d836188e316d93b87bcb0bf69e09afb1d..734db27f512a11124ef512b55ce578301d4c790c 100644 --- a/src/TNL/Config/ParameterContainer.h +++ b/src/TNL/Config/ParameterContainer.h @@ -12,9 +12,9 @@ #include <vector> #include <memory> -#include "make_unique.h" -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> +#include <TNL/String.h> //#include <TNL/Debugging/StackBacktrace.h> namespace TNL { @@ -63,7 +63,7 @@ public: parameters.push_back( std::make_unique< Parameter< T > >( name, TNL::getType< T >(), value ) ); return true; } - + /** * \brief Adds new parameter to the ParameterContainer. * @@ -73,7 +73,7 @@ public: */ template< class T > bool addList( const String& name, - const T& value ) + const T& value ) { std::vector< T > v; v.push_back( value ); diff --git a/src/TNL/Config/make_unique.h b/src/TNL/Config/make_unique.h deleted file mode 100644 index 4a4078a028e2c1cb9feec357080d20bc3f05454b..0000000000000000000000000000000000000000 --- a/src/TNL/Config/make_unique.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -// std::make_unique does not exist until C++14 -// https://stackoverflow.com/a/9657991 -#if __cplusplus < 201402L -#include <memory> - -namespace std { - template<typename T, typename ...Args> - std::unique_ptr<T> make_unique( Args&& ...args ) - { - return std::unique_ptr<T>( new T( std::forward<Args>(args)... ) ); - } -} -#endif diff --git a/src/TNL/Config/parseCommandLine.h b/src/TNL/Config/parseCommandLine.h index 34a555f2890ee6995e6ea464c00410f83ce1ccf3..8993de027b2d4149112a4d74098478d3bdee3268 100644 --- a/src/TNL/Config/parseCommandLine.h +++ b/src/TNL/Config/parseCommandLine.h @@ -13,7 +13,6 @@ #include <cstring> #include <string> -//#include <TNL/Object.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> @@ -51,7 +50,7 @@ parseCommandLine( int argc, char* argv[], int i; bool parse_error( false ); - for( i = 1; i < argc; i ++ ) + for( i = 1; i < argc; i++ ) { const char* _option = argv[ i ]; if( _option[ 0 ] != '-' ) @@ -75,7 +74,7 @@ parseCommandLine( int argc, char* argv[], else { const String& entryType = entry->getEntryType(); - const char* value = argv[ ++ i ]; + const char* value = argv[ ++i ]; if( ! value ) { std::cerr << "Missing value for the parameter " << option << "." << std::endl; @@ -97,11 +96,11 @@ parseCommandLine( int argc, char* argv[], while( i < argc && ( ( argv[ i ] )[ 0 ] != '-' || ( atof( argv[ i ] ) < 0.0 && ( parsedEntryType[ 1 ] == "int" || parsedEntryType[ 1 ] == "double" ) ) ) ) { const char* value = argv[ i ++ ]; - if( parsedEntryType[ 1 ] == "String" ) + if( parsedEntryType[ 1 ] == "TNL::String" ) { string_list.push_back( String( value ) ); } - if( parsedEntryType[ 1 ] == "bool" ) + else if( parsedEntryType[ 1 ] == "bool" ) { const int v = matob( value ); if( v == -1 ) @@ -111,14 +110,19 @@ parseCommandLine( int argc, char* argv[], } else bool_list.push_back( v ); } - if( parsedEntryType[ 1 ] == "int" ) + else if( parsedEntryType[ 1 ] == "int" ) { integer_list.push_back( atoi( value ) ); } - if( parsedEntryType[ 1 ] == "double" ) + else if( parsedEntryType[ 1 ] == "double" ) { real_list.push_back( atof( value ) ); } + else + { + // this will not happen if all entry types are handled above + throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType ); + } } if( string_list.size() ) parameters.addParameter< std::vector< String > >( option, string_list ); @@ -133,14 +137,14 @@ parseCommandLine( int argc, char* argv[], } else { - if( parsedEntryType[ 0 ] == "String" ) + if( parsedEntryType[ 0 ] == "TNL::String" ) { if( ! ( ( ConfigEntry< String >* ) entry )->checkValue( value ) ) return false; parameters.addParameter< String >( option, value ); continue; } - if( parsedEntryType[ 0 ] == "bool" ) + else if( parsedEntryType[ 0 ] == "bool" ) { const int v = matob( value ); if( v == -1 ) @@ -151,7 +155,7 @@ parseCommandLine( int argc, char* argv[], else parameters.addParameter< bool >( option, v ); continue; } - if( parsedEntryType[ 0 ] == "int" ) + else if( parsedEntryType[ 0 ] == "int" ) { /*if( ! std::isdigit( value ) ) //TODO: Check for real number { @@ -163,7 +167,7 @@ parseCommandLine( int argc, char* argv[], return false; parameters.addParameter< int >( option, atoi( value ) ); } - if( parsedEntryType[ 0 ] == "double" ) + else if( parsedEntryType[ 0 ] == "double" ) { /*if( ! std::isdigit( value ) ) //TODO: Check for real number { @@ -175,6 +179,11 @@ parseCommandLine( int argc, char* argv[], return false; parameters.addParameter< double >( option, atof( value ) ); } + else + { + // this will not happen if all entry types are handled above + throw std::runtime_error( "Function parseCommandLine encountered unsupported entry type: " + entryType ); + } } } } diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp deleted file mode 100644 index b81fd7f2b7e5f0c11211ef8263da89d00cf243cf..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp +++ /dev/null @@ -1,333 +0,0 @@ -/*************************************************************************** - ArrayOperationsCuda.hpp - description - ------------------- - begin : Jul 16, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <iostream> -#include <memory> -#include <stdexcept> - -#include <TNL/Math.h> -#include <TNL/ParallelFor.h> -#include <TNL/Exceptions/CudaSupportMissing.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/Reduction.h> - -namespace TNL { -namespace Containers { -namespace Algorithms { - -template< typename Element > -void -ArrayOperations< Devices::Cuda >:: -setElement( Element* data, - const Element& value ) -{ - TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); - ArrayOperations< Devices::Cuda >::set( data, value, 1 ); -} - -template< typename Element > -Element -ArrayOperations< Devices::Cuda >:: -getElement( const Element* data ) -{ - TNL_ASSERT_TRUE( data, "Attempted to get data through a nullptr." ); - Element result; - ArrayOperations< Devices::Host, Devices::Cuda >::copy< Element, Element, int >( &result, data, 1 ); - return result; -} - -template< typename Element, typename Index > -void -ArrayOperations< Devices::Cuda >:: -set( Element* data, - const Element& value, - const Index size ) -{ - if( size == 0 ) return; - TNL_ASSERT_TRUE( data, "Attempted to set data through a nullptr." ); - auto kernel = [data, value] __cuda_callable__ ( Index i ) - { - data[ i ] = value; - }; - ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); -} - -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::Cuda >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - if( size == 0 ) return; - TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); - TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); - if( std::is_same< DestinationElement, SourceElement >::value ) - { -#ifdef HAVE_CUDA - cudaMemcpy( destination, - source, - size * sizeof( DestinationElement ), - cudaMemcpyDeviceToDevice ); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif - } - else - { - auto kernel = [destination, source] __cuda_callable__ ( Index i ) - { - destination[ i ] = source[ i ]; - }; - ParallelFor< Devices::Cuda >::exec( (Index) 0, size, kernel ); - } -} - -template< typename DestinationElement, - typename Index, - typename SourceIterator > -void -ArrayOperations< Devices::Cuda >:: -copyFromIterator( DestinationElement* destination, - Index destinationSize, - SourceIterator first, - SourceIterator last ) -{ - using BaseType = typename std::remove_cv< DestinationElement >::type; - std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] }; - Index copiedElements = 0; - while( copiedElements < destinationSize && first != last ) { - Index i = 0; - while( i < Devices::Cuda::getGPUTransferBufferSize() && first != last ) - buffer[ i++ ] = *first++; - ArrayOperations< Devices::Cuda, Devices::Host >::copy( &destination[ copiedElements ], buffer.get(), i ); - copiedElements += i; - } - if( first != last ) - throw std::length_error( "Source iterator is larger than the destination array." ); -} - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::Cuda >:: -compare( const Element1* destination, - const Element2* source, - const Index size ) -{ - if( size == 0 ) return true; - TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); - TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); - - auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return destination[ i ] == source[ i ]; }; - return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); -} - -template< typename Element, - typename Index > -bool -ArrayOperations< Devices::Cuda >:: -containsValue( const Element* data, - const Index size, - const Element& value ) -{ - if( size == 0 ) return false; - TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); - TNL_ASSERT_GE( size, (Index) 0, "" ); - - auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( size, std::logical_or<>{}, fetch, false ); -} - -template< typename Element, - typename Index > -bool -ArrayOperations< Devices::Cuda >:: -containsOnlyValue( const Element* data, - const Index size, - const Element& value ) -{ - if( size == 0 ) return false; - TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); - TNL_ASSERT_GE( size, 0, "" ); - - auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return data[ i ] == value; }; - return Reduction< Devices::Cuda >::reduce( size, std::logical_and<>{}, fetch, true ); -} - - -/**** - * Operations CUDA -> Host - */ -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::Host, Devices::Cuda >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - if( size == 0 ) return; - TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); - TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); -#ifdef HAVE_CUDA - if( std::is_same< DestinationElement, SourceElement >::value ) - { - if( cudaMemcpy( destination, - source, - size * sizeof( DestinationElement ), - cudaMemcpyDeviceToHost ) != cudaSuccess ) - std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - TNL_CHECK_CUDA_DEVICE; - } - else - { - using BaseType = typename std::remove_cv< SourceElement >::type; - std::unique_ptr< BaseType[] > buffer{ new BaseType[ Devices::Cuda::getGPUTransferBufferSize() ] }; - Index i( 0 ); - while( i < size ) - { - if( cudaMemcpy( (void*) buffer.get(), - (void*) &source[ i ], - TNL::min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), - cudaMemcpyDeviceToHost ) != cudaSuccess ) - std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - TNL_CHECK_CUDA_DEVICE; - Index j( 0 ); - while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) - { - destination[ i + j ] = buffer[ j ]; - j++; - } - i += j; - } - } -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::Host, Devices::Cuda >:: -compare( const Element1* destination, - const Element2* source, - const Index size ) -{ - if( size == 0 ) return true; - /*** - * Here, destination is on host and source is on CUDA device. - */ - TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); - TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); - TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); -#ifdef HAVE_CUDA - std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] }; - Index compared( 0 ); - while( compared < size ) - { - Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); - if( cudaMemcpy( (void*) host_buffer.get(), - (void*) &source[ compared ], - transfer * sizeof( Element2 ), - cudaMemcpyDeviceToHost ) != cudaSuccess ) - std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - TNL_CHECK_CUDA_DEVICE; - if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], host_buffer.get(), transfer ) ) - return false; - compared += transfer; - } - return true; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -/**** - * Operations Host -> CUDA - */ -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::Cuda, Devices::Host >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - if( size == 0 ) return; - TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); - TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); - TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); -#ifdef HAVE_CUDA - if( std::is_same< DestinationElement, SourceElement >::value ) - { - if( cudaMemcpy( destination, - source, - size * sizeof( DestinationElement ), - cudaMemcpyHostToDevice ) != cudaSuccess ) - std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - TNL_CHECK_CUDA_DEVICE; - } - else - { - std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; - Index i( 0 ); - while( i < size ) - { - Index j( 0 ); - while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) - { - buffer[ j ] = source[ i + j ]; - j++; - } - if( cudaMemcpy( (void*) &destination[ i ], - (void*) buffer.get(), - j * sizeof( DestinationElement ), - cudaMemcpyHostToDevice ) != cudaSuccess ) - std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - TNL_CHECK_CUDA_DEVICE; - i += j; - } - } -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::Cuda, Devices::Host >:: -compare( const Element1* hostData, - const Element2* deviceData, - const Index size ) -{ - if( size == 0 ) return true; - TNL_ASSERT_TRUE( hostData, "Attempted to compare data through a nullptr." ); - TNL_ASSERT_TRUE( deviceData, "Attempted to compare data through a nullptr." ); - TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); - return ArrayOperations< Devices::Host, Devices::Cuda >::compare( deviceData, hostData, size ); -} - -} // namespace Algorithms -} // namespace Containers -} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp deleted file mode 100644 index 4113bbcd90f0edce53d143cf65996a392c2a91b4..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/Algorithms/ArrayOperationsMIC.hpp +++ /dev/null @@ -1,429 +0,0 @@ -/*************************************************************************** - ArrayOperationsMIC_impl.h - description - ------------------- - begin : Mar 4, 2017 - copyright : (C) 2017 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Vit Hanousek - -#pragma once - -#include <iostream> - -#include <TNL/Math.h> -#include <TNL/Exceptions/MICSupportMissing.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/Reduction.h> -#include <TNL/Exceptions/NotImplementedError.h> - -namespace TNL { -namespace Containers { -namespace Algorithms { - -static constexpr std::size_t MIC_STACK_VAR_LIM = 5*1024*1024; - -template< typename Element > -void -ArrayOperations< Devices::MIC >:: -setElement( Element* data, - const Element& value ) -{ - TNL_ASSERT( data, ); - ArrayOperations< Devices::MIC >::set( data, value, 1 ); -} - -template< typename Element > -Element -ArrayOperations< Devices::MIC >:: -getElement( const Element* data ) -{ - TNL_ASSERT( data, ); - Element result; - ArrayOperations< Devices::Host, Devices::MIC >::copy< Element, Element, int >( &result, data, 1 ); - return result; -} - -template< typename Element, typename Index > -void -ArrayOperations< Devices::MIC >:: -set( Element* data, - const Element& value, - const Index size ) -{ - TNL_ASSERT( data, ); -#ifdef HAVE_MIC - Element tmp=value; - Devices::MICHider<Element> hide_ptr; - hide_ptr.pointer=data; - #pragma offload target(mic) in(hide_ptr,tmp,size) - { - Element * dst= hide_ptr.pointer; - for(int i=0;i<size;i++) - dst[i]=tmp; - } -#else - throw Exceptions::MICSupportMissing(); -#endif -} - -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::MIC >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - TNL_ASSERT( destination, ); - TNL_ASSERT( source, ); - #ifdef HAVE_MIC - if( std::is_same< DestinationElement, SourceElement >::value ) - { - Devices::MICHider<void> src_ptr; - src_ptr.pointer=(void*)source; - Devices::MICHider<void> dst_ptr; - dst_ptr.pointer=(void*)destination; - #pragma offload target(mic) in(src_ptr,dst_ptr,size) - { - memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); - } - } - else - { - Devices::MICHider<const SourceElement> src_ptr; - src_ptr.pointer=source; - Devices::MICHider<DestinationElement> dst_ptr; - dst_ptr.pointer=destination; - #pragma offload target(mic) in(src_ptr,dst_ptr,size) - { - for(int i=0;i<size;i++) - dst_ptr.pointer[i]=src_ptr.pointer[i]; - } - } - #else - throw Exceptions::MICSupportMissing(); - #endif -} - -template< typename DestinationElement, - typename Index, - typename SourceIterator > -void -ArrayOperations< Devices::MIC >:: -copyFromIterator( DestinationElement* destination, - Index destinationSize, - SourceIterator first, - SourceIterator last ) -{ - throw Exceptions::NotImplementedError(); -} - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::MIC >:: -compare( const Element1* destination, - const Element2* source, - const Index size ) -{ - TNL_ASSERT( destination, ); - TNL_ASSERT( source, ); -#ifdef HAVE_MIC - if( std::is_same< Element1, Element2 >::value ) - { - Devices::MICHider<void> src_ptr; - src_ptr.pointer=(void*)source; - Devices::MICHider<void> dst_ptr; - dst_ptr.pointer=(void*)destination; - int ret=0; - #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret) - { - ret=memcmp(dst_ptr.pointer,src_ptr.pointer,size*sizeof(Element1)); - } - if(ret==0) - return true; - } - else - { - Devices::MICHider<const Element2> src_ptr; - src_ptr.pointer=source; - Devices::MICHider<const Element1> dst_ptr; - dst_ptr.pointer=destination; - bool ret=false; - #pragma offload target(mic) in(src_ptr,dst_ptr,size) out(ret) - { - int i=0; - for(i=0;i<size;i++) - if(dst_ptr.pointer[i]!=src_ptr.pointer[i]) - break; - if(i==size) - ret=true; - else - ret=false; - } - return ret; - } - return false; -#else - throw Exceptions::MICSupportMissing(); -#endif -} - -template< typename Element, - typename Index > -bool -ArrayOperations< Devices::MIC >:: -containsValue( const Element* data, - const Index size, - const Element& value ) -{ - TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); - TNL_ASSERT_GE( size, 0, "" ); -#ifdef HAVE_MIC - throw Exceptions::NotImplementedError(); -#else - throw Exceptions::MICSupportMissing(); -#endif -} - -template< typename Element, - typename Index > -bool -ArrayOperations< Devices::MIC >:: -containsOnlyValue( const Element* data, - const Index size, - const Element& value ) -{ - TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); - TNL_ASSERT_GE( size, 0, "" ); -#ifdef HAVE_MIC - throw Exceptions::NotImplementedError(); -#else - throw Exceptions::MICSupportMissing(); -#endif -} - - - -/**** - * Operations MIC -> Host - */ - -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::Host, Devices::MIC >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - TNL_ASSERT( destination, ); - TNL_ASSERT( source, ); -#ifdef HAVE_MIC - if( std::is_same< DestinationElement, SourceElement >::value ) - { - Devices::MICHider<void> src_ptr; - src_ptr.pointer=(void*)source; - - //JAKA KONSTANTA se vejde do stacku 5MB? - if(size<MIC_STACK_VAR_LIM) - { - uint8_t tmp[size*sizeof(SourceElement)]; - - #pragma offload target(mic) in(src_ptr,size) out(tmp) - { - memcpy((void*)&tmp,src_ptr.pointer,size*sizeof(SourceElement)); - } - - memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); - } - else - { - //direct -- pomalejšà- uint8_t* tmp=(uint8_t*)destination; - #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size)) - { - memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); - } - } - } - else - { - Devices::MICHider<const SourceElement> src_ptr; - src_ptr.pointer=source; - - if(size<MIC_STACK_VAR_LIM) - { - uint8_t tmp[size*sizeof(DestinationElement)]; - - #pragma offload target(mic) in(src_ptr,size) out(tmp) - { - DestinationElement *dst=(DestinationElement*)&tmp; - for(int i=0;i<size;i++) - dst[i]=src_ptr.pointer[i]; - } - - memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); - } - else - { - //direct pseudo heap-- pomalejšà- uint8_t* tmp=(uint8_t*)destination; - #pragma offload target(mic) in(src_ptr,size) out(tmp:length(size*sizeof(DestinationElement))) - { - DestinationElement *dst=(DestinationElement*)tmp; - for(int i=0;i<size;i++) - dst[i]=src_ptr.pointer[i]; - } - } - } -#else - throw Exceptions::MICSupportMissing(); -#endif -} - - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::Host, Devices::MIC >:: -compare( const Element1* destination, - const Element2* source, - const Index size ) -{ - /*** - * Here, destination is on host and source is on MIC device. - */ - TNL_ASSERT( destination, ); - TNL_ASSERT( source, ); - TNL_ASSERT( size >= 0, std::cerr << "size = " << size ); -#ifdef HAVE_MIC - Index compared( 0 ); - Index transfer( 0 ); - std::size_t max_transfer=MIC_STACK_VAR_LIM/sizeof(Element2); - uint8_t host_buffer[max_transfer*sizeof(Element2)]; - - Devices::MICHider<const Element2> src_ptr; - - while( compared < size ) - { - transfer=min(size-compared,max_transfer); - src_ptr.pointer=source+compared; - #pragma offload target(mic) out(host_buffer) in(src_ptr,transfer) - { - memcpy((void*)&host_buffer,(void*)src_ptr.pointer,transfer*sizeof(Element2)); - } - if( ! ArrayOperations< Devices::Host >::compare( &destination[ compared ], (Element2*)&host_buffer, transfer ) ) - { - return false; - } - compared += transfer; - } - return true; -#else - throw Exceptions::MICSupportMissing(); -#endif -} - -/**** - * Operations Host -> MIC - */ -template< typename DestinationElement, - typename SourceElement, - typename Index > -void -ArrayOperations< Devices::MIC, Devices::Host >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - TNL_ASSERT( destination, ); - TNL_ASSERT( source, ); - TNL_ASSERT( size >= 0, std::cerr << "size = " << size ); -#ifdef HAVE_MIC - if( std::is_same< DestinationElement, SourceElement >::value ) - { - Devices::MICHider<void> dst_ptr; - dst_ptr.pointer=(void*)destination; - - //JAKA KONSTANTA se vejde do stacku 5MB? - if(size<MIC_STACK_VAR_LIM) - { - uint8_t tmp[size*sizeof(SourceElement)]; - memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement)); - - #pragma offload target(mic) in(dst_ptr,tmp,size) - { - memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); - } - } - else - { - //direct pseudo heap-- pomalejšà- uint8_t* tmp=(uint8_t*)source; - #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size)) - { - memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); - } - } - } - else - { - Devices::MICHider<DestinationElement> dst_ptr; - dst_ptr.pointer=destination; - - if(size<MIC_STACK_VAR_LIM) - { - uint8_t tmp[size*sizeof(SourceElement)]; - memcpy((void*)&tmp,(void*)source,size*sizeof(SourceElement)); - - #pragma offload target(mic) in(dst_ptr,size,tmp) - { - SourceElement *src=(SourceElement*)&tmp; - for(int i=0;i<size;i++) - dst_ptr.pointer[i]=src[i]; - } - } - else - { - //direct pseudo heap-- pomalejšà- uint8_t* tmp=(uint8_t*)source; - #pragma offload target(mic) in(dst_ptr,size) in(tmp:length(size*sizeof(SourceElement))) - { - SourceElement *src=(SourceElement*)tmp; - for(int i=0;i<size;i++) - dst_ptr.pointer[i]=src[i]; - } - } - } -#else - throw Exceptions::MICSupportMissing(); -#endif -} - -template< typename Element1, - typename Element2, - typename Index > -bool -ArrayOperations< Devices::MIC, Devices::Host >:: -compare( const Element1* hostData, - const Element2* deviceData, - const Index size ) -{ - TNL_ASSERT( hostData, ); - TNL_ASSERT( deviceData, ); - TNL_ASSERT( size >= 0, std::cerr << "size = " << size ); - return ArrayOperations< Devices::Host, Devices::MIC >::compare( deviceData, hostData, size ); -} - -} // namespace Algorithms -} // namespace Containers -} // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp b/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp deleted file mode 100644 index d84933bde61f61c10108c47e13ba994b3b1709b4..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/Algorithms/ArrayOperationsStatic.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/*************************************************************************** - ArrayOperationsStatic.hpp - description - ------------------- - begin : Apr 8, 2019 - copyright : (C) 2019 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Containers/Algorithms/ArrayOperations.h> - -namespace TNL { -namespace Containers { -namespace Algorithms { - -template< typename Element > -__cuda_callable__ -void -ArrayOperations< void >:: -setElement( Element* data, - const Element& value ) -{ - *data = value; -} - -template< typename Element > -__cuda_callable__ -Element -ArrayOperations< void >:: -getElement( const Element* data ) -{ - return *data; -} - -template< typename Element, typename Index > -__cuda_callable__ -void -ArrayOperations< void >:: -set( Element* data, - const Element& value, - const Index size ) -{ - for( Index i = 0; i < size; i ++ ) - data[ i ] = value; -} - -template< typename DestinationElement, - typename SourceElement, - typename Index > -__cuda_callable__ -void -ArrayOperations< void >:: -copy( DestinationElement* destination, - const SourceElement* source, - const Index size ) -{ - for( Index i = 0; i < size; i ++ ) - destination[ i ] = source[ i ]; -} - -template< typename Element1, - typename Element2, - typename Index > -__cuda_callable__ -bool -ArrayOperations< void >:: -compare( const Element1* destination, - const Element2* source, - const Index size ) -{ - for( Index i = 0; i < size; i++ ) - if( ! ( destination[ i ] == source[ i ] ) ) - return false; - return true; -} - -} // namespace Algorithms -} // namespace Containers -} // namespace TNL diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index d9a5e56d2291a1b8b9e6a13f06121c0203e859a0..45ef1e272e8affa96e6a77b5b9e74cec8a59b447 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -73,7 +73,6 @@ template< typename Value, class Array { public: - /** * \brief Type of elements stored in this array. */ @@ -98,16 +97,6 @@ class Array */ using AllocatorType = Allocator; - /** - * \brief Defines the same array type but allocated on host (CPU). - */ - using HostType = Array< Value, TNL::Devices::Host, Index >; - - /** - * \brief Defines the same array type but allocated on CUDA device (GPU). - */ - using CudaType = Array< Value, TNL::Devices::Cuda, Index >; - /** * \brief Compatible ArrayView type. */ @@ -118,6 +107,15 @@ class Array */ using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >; + /** + * \brief A template which allows to quickly obtain an \ref Array type with changed template parameters. + */ + template< typename _Value, + typename _Device = Device, + typename _Index = Index, + typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Value > > + using Self = Array< _Value, _Device, _Index, _Allocator >; + /** * \brief Constructs an empty array with zero size. @@ -226,25 +224,15 @@ class Array */ AllocatorType getAllocator() const; - /** - * \brief Returns a \ref String representation of the array type in C++ style. - */ - static String getType(); - - /** - * \brief Returns a \ref String representation of the array type in C++ style. - */ - virtual String getTypeVirtual() const; - /** * \brief Returns a \ref String representation of the array type in C++ style, - * where device is always \ref Devices::Host. + * with a placeholder in place of \e Device and \e Allocator. */ static String getSerializationType(); /** * \brief Returns a \ref String representation of the array type in C++ style, - * where device is always \ref Devices::Host. + * with a placeholder in place of \e Device and \e Allocator. */ virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 4a9c484a4c19d84d30ef5d6760fad0362b64d42b..24e3f8b43a024c8c8c3b87213a31886c595caceb 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -15,10 +15,9 @@ #include <TNL/Assert.h> #include <TNL/Math.h> -#include <TNL/param-types.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/ArrayIO.h> -#include <TNL/Containers/Algorithms/ArrayAssignment.h> +#include <TNL/TypeInfo.h> +#include <TNL/Containers/detail/ArrayIO.h> +#include <TNL/Containers/detail/ArrayAssignment.h> #include "Array.h" @@ -74,7 +73,7 @@ Array( Value* data, : allocator( allocator ) { this->setSize( size ); - Algorithms::ArrayOperations< Device >::copy( this->getData(), data, size ); + Algorithms::MemoryOperations< Device >::copy( this->getData(), data, size ); } template< typename Value, @@ -85,7 +84,7 @@ Array< Value, Device, Index, Allocator >:: Array( const Array< Value, Device, Index, Allocator >& array ) { this->setSize( array.getSize() ); - Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() ); + Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() ); } template< typename Value, @@ -98,7 +97,7 @@ Array( const Array< Value, Device, Index, Allocator >& array, : allocator( allocator ) { this->setSize( array.getSize() ); - Algorithms::ArrayOperations< Device >::copy( this->getData(), array.getData(), array.getSize() ); + Algorithms::MemoryOperations< Device >::copy( this->getData(), array.getData(), array.getSize() ); } template< typename Value, @@ -118,7 +117,7 @@ Array( const Array< Value, Device, Index, Allocator >& array, TNL_ASSERT_LE( begin + size, array.getSize(), "End of array is out of bounds." ); this->setSize( size ); - Algorithms::ArrayOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size ); + Algorithms::MemoryOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size ); } template< typename Value, @@ -135,7 +134,7 @@ Array( const std::initializer_list< InValue >& list, // Here we assume that the underlying array for std::initializer_list is // const T[N] as noted here: // https://en.cppreference.com/w/cpp/utility/initializer_list - Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() ); + Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), &( *list.begin() ), list.size() ); } template< typename Value, @@ -149,7 +148,7 @@ Array( const std::list< InValue >& list, : allocator( allocator ) { this->setSize( list.size() ); - Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() ); + Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() ); } template< typename Value, @@ -163,7 +162,7 @@ Array( const std::vector< InValue >& vector, : allocator( allocator ) { this->setSize( vector.size() ); - Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() ); + Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() ); } template< typename Value, @@ -177,31 +176,6 @@ getAllocator() const return allocator; } -template< typename Value, - typename Device, - typename Index, - typename Allocator > -String -Array< Value, Device, Index, Allocator >:: -getType() -{ - return String( "Containers::Array< " ) + - TNL::getType< Value >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; -} - -template< typename Value, - typename Device, - typename Index, - typename Allocator > -String -Array< Value, Device, Index, Allocator >:: -getTypeVirtual() const -{ - return this->getType(); -} - template< typename Value, typename Device, typename Index, @@ -210,7 +184,7 @@ String Array< Value, Device, Index, Allocator >:: getSerializationType() { - return Algorithms::ArrayIO< Value, Device, Index >::getSerializationType(); + return detail::ArrayIO< Value, Device, Index >::getSerializationType(); } template< typename Value, @@ -510,7 +484,7 @@ setElement( const Index& i, const Value& x ) { TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); - return Algorithms::ArrayOperations< Device >::setElement( &( this->data[ i ] ), x ); + return Algorithms::MemoryOperations< Device >::setElement( &( this->data[ i ] ), x ); } template< typename Value, @@ -523,7 +497,7 @@ getElement( const Index& i ) const { TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); - return Algorithms::ArrayOperations< Device >::getElement( & ( this->data[ i ] ) ); + return Algorithms::MemoryOperations< Device >::getElement( & ( this->data[ i ] ) ); } template< typename Value, @@ -566,7 +540,7 @@ operator=( const Array< Value, Device, Index, Allocator >& array ) if( this->getSize() != array.getSize() ) this->setLike( array ); if( this->getSize() > 0 ) - Algorithms::ArrayOperations< Device >:: + Algorithms::MemoryOperations< Device >:: copy( this->getData(), array.getData(), array.getSize() ); @@ -605,8 +579,8 @@ Array< Value, Device, Index, Allocator >& Array< Value, Device, Index, Allocator >:: operator=( const T& data ) { - Algorithms::ArrayAssignment< Array, T >::resize( *this, data ); - Algorithms::ArrayAssignment< Array, T >::assign( *this, data ); + detail::ArrayAssignment< Array, T >::resize( *this, data ); + detail::ArrayAssignment< Array, T >::assign( *this, data ); return *this; } @@ -620,7 +594,7 @@ Array< Value, Device, Index, Allocator >:: operator=( const std::list< InValue >& list ) { this->setSize( list.size() ); - Algorithms::ArrayOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() ); + Algorithms::MemoryOperations< Device >::copyFromIterator( this->getData(), this->getSize(), list.cbegin(), list.cend() ); return *this; } @@ -635,7 +609,7 @@ operator=( const std::vector< InValue >& vector ) { if( (std::size_t) this->getSize() != vector.size() ) this->setSize( vector.size() ); - Algorithms::ArrayOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() ); + Algorithms::MultiDeviceMemoryOperations< Device, Devices::Host >::copy( this->getData(), vector.data(), vector.size() ); return *this; } @@ -652,7 +626,7 @@ operator==( const ArrayT& array ) const return false; if( this->getSize() == 0 ) return true; - return Algorithms::ArrayOperations< Device, typename ArrayT::DeviceType >:: + return Algorithms::MultiDeviceMemoryOperations< Device, typename ArrayT::DeviceType >:: compare( this->getData(), array.getData(), array.getSize() ); @@ -683,7 +657,7 @@ setValue( const ValueType& v, TNL_ASSERT_TRUE( this->getData(), "Attempted to set a value of an empty array." ); if( end == 0 ) end = this->getSize(); - Algorithms::ArrayOperations< Device >::set( &this->getData()[ begin ], v, end - begin ); + Algorithms::MemoryOperations< Device >::set( &this->getData()[ begin ], v, end - begin ); } template< typename Value, @@ -715,7 +689,7 @@ containsValue( const ValueType& v, if( end == 0 ) end = this->getSize(); - return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v ); + return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, v ); } template< typename Value, @@ -732,7 +706,7 @@ containsOnlyValue( const ValueType& v, if( end == 0 ) end = this->getSize(); - return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v ); + return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, v ); } template< typename Value, @@ -785,7 +759,7 @@ std::ostream& operator<<( std::ostream& str, const Array< Value, Device, Index, template< typename Value, typename Device, typename Index, typename Allocator > File& operator<<( File& file, const Array< Value, Device, Index, Allocator >& array ) { - using IO = Algorithms::ArrayIO< Value, Device, Index >; + using IO = detail::ArrayIO< Value, Index, Allocator >; saveObjectType( file, IO::getSerializationType() ); const Index size = array.getSize(); file.save( &size ); @@ -804,7 +778,7 @@ File& operator<<( File&& file, const Array< Value, Device, Index, Allocator >& a template< typename Value, typename Device, typename Index, typename Allocator > File& operator>>( File& file, Array< Value, Device, Index, Allocator >& array ) { - using IO = Algorithms::ArrayIO< Value, Device, Index >; + using IO = detail::ArrayIO< Value, Index, Allocator >; const String type = getObjectType( file ); if( type != IO::getSerializationType() ) throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." ); diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index 066ada8f248baf20e94c3cf9c1f91e412ed9acc2..d51f151f772f3828dc7ad27ca13041d01730ce76 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -80,16 +80,6 @@ public: */ using IndexType = Index; - /** - * \brief Defines the same array type but allocated on host (CPU). - */ - using HostType = ArrayView< Value, TNL::Devices::Host, Index >; - - /** - * \brief Defines the same array type but allocated on CUDA device (GPU). - */ - using CudaType = ArrayView< Value, TNL::Devices::Cuda, Index >; - /** * \brief Compatible ArrayView type. */ @@ -101,9 +91,13 @@ public: using ConstViewType = ArrayView< std::add_const_t< Value >, Device, Index >; /** - * \brief Returns a \ref String representation of the array view type. + * \brief A template which allows to quickly obtain an \ref ArrayView type with changed template parameters. */ - static String getType(); + template< typename _Value, + typename _Device = Device, + typename _Index = Index > + using Self = ArrayView< _Value, _Device, _Index >; + /** * \brief Constructs an empty array view. diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index ea7882516cd13647e66e971dbbec11179c1ea520..c3c39bc10be8dd846331d1086fc1d22b42b8c6c7 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -13,30 +13,19 @@ #include <iostream> #include <stdexcept> -#include <TNL/param-types.h> -#include <TNL/ParallelFor.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> -#include <TNL/Containers/Algorithms/ArrayIO.h> -#include <TNL/Containers/Algorithms/ArrayAssignment.h> +#include <TNL/TypeInfo.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> +#include <TNL/Containers/detail/ArrayIO.h> +#include <TNL/Containers/detail/ArrayAssignment.h> +#include <TNL/Allocators/Default.h> #include "ArrayView.h" namespace TNL { namespace Containers { -template< typename Value, - typename Device, - typename Index > -String -ArrayView< Value, Device, Index >:: -getType() -{ - return String( "Containers::ArrayView< " ) + ", " + - TNL::getType< Value >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; -} - // explicit initialization by raw data pointer and size template< typename Value, typename Device, @@ -113,7 +102,7 @@ operator=( const ArrayView& view ) { TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." ); if( getSize() > 0 ) - Algorithms::ArrayOperations< Device >::copy( getData(), view.getData(), getSize() ); + Algorithms::MemoryOperations< Device >::copy( getData(), view.getData(), getSize() ); return *this; } @@ -125,7 +114,7 @@ ArrayView< Value, Device, Index >& ArrayView< Value, Device, Index >:: operator=( const T& data ) { - Algorithms::ArrayAssignment< ArrayView, T >::assign( *this, data ); + detail::ArrayAssignment< ArrayView, T >::assign( *this, data ); return *this; } @@ -228,7 +217,7 @@ setElement( Index i, Value value ) { TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); - return Algorithms::ArrayOperations< Device >::setElement( &data[ i ], value ); + return Algorithms::MemoryOperations< Device >::setElement( &data[ i ], value ); } template< typename Value, @@ -240,7 +229,7 @@ getElement( Index i ) const { TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); - return Algorithms::ArrayOperations< Device >::getElement( &data[ i ] ); + return Algorithms::MemoryOperations< Device >::getElement( &data[ i ] ); } template< typename Value, @@ -280,7 +269,7 @@ operator==( const ArrayT& array ) const return false; if( this->getSize() == 0 ) return true; - return Algorithms::ArrayOperations< DeviceType, typename ArrayT::DeviceType >:: + return Algorithms::MultiDeviceMemoryOperations< DeviceType, typename ArrayT::DeviceType >:: compare( this->getData(), array.getData(), array.getSize() ); @@ -307,7 +296,7 @@ setValue( Value value, const Index begin, Index end ) TNL_ASSERT_GT( size, 0, "Attempted to set value to an empty array view." ); if( end == 0 ) end = this->getSize(); - Algorithms::ArrayOperations< Device >::set( &getData()[ begin ], value, end - begin ); + Algorithms::MemoryOperations< Device >::set( &getData()[ begin ], value, end - begin ); } template< typename Value, @@ -328,7 +317,7 @@ evaluate( const Function& f, const Index begin, Index end ) if( end == 0 ) end = this->getSize(); - ParallelFor< DeviceType >::exec( begin, end, eval ); + Algorithms::ParallelFor< DeviceType >::exec( begin, end, eval ); } template< typename Value, @@ -342,7 +331,7 @@ containsValue( Value value, { if( end == 0 ) end = this->getSize(); - return Algorithms::ArrayOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value ); + return Algorithms::MemoryOperations< Device >::containsValue( &this->getData()[ begin ], end - begin, value ); } template< typename Value, @@ -356,7 +345,7 @@ containsOnlyValue( Value value, { if( end == 0 ) end = this->getSize(); - return Algorithms::ArrayOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value ); + return Algorithms::MemoryOperations< Device >::containsOnlyValue( &this->getData()[ begin ], end - begin, value ); } template< typename Value, typename Device, typename Index > @@ -395,7 +384,7 @@ load( const String& fileName ) template< typename Value, typename Device, typename Index > File& operator<<( File& file, const ArrayView< Value, Device, Index > view ) { - using IO = Algorithms::ArrayIO< Value, Device, Index >; + using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >; saveObjectType( file, IO::getSerializationType() ); const Index size = view.getSize(); file.save( &size ); @@ -414,7 +403,7 @@ File& operator<<( File&& file, const ArrayView< Value, Device, Index > view ) template< typename Value, typename Device, typename Index > File& operator>>( File& file, ArrayView< Value, Device, Index > view ) { - using IO = Algorithms::ArrayIO< Value, Device, Index >; + using IO = detail::ArrayIO< Value, Index, typename Allocators::Default< Device >::template Allocator< Value > >; const String type = getObjectType( file ); if( type != IO::getSerializationType() ) throw Exceptions::FileDeserializationError( file.getFileName(), "object type does not match (expected " + IO::getSerializationType() + ", found " + type + ")." ); diff --git a/src/TNL/Containers/DistributedArray.h b/src/TNL/Containers/DistributedArray.h index 7f53c724a337ba9052520c250fbafa12c613c5f6..ce4e9ce5e15c6c110c117e984dc5a2e8ee26da67 100644 --- a/src/TNL/Containers/DistributedArray.h +++ b/src/TNL/Containers/DistributedArray.h @@ -35,11 +35,19 @@ public: using LocalRangeType = Subrange< Index >; using LocalViewType = Containers::ArrayView< Value, Device, Index >; using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >; - using HostType = DistributedArray< Value, Devices::Host, Index, Communicator >; - using CudaType = DistributedArray< Value, Devices::Cuda, Index, Communicator >; using ViewType = DistributedArrayView< Value, Device, Index, Communicator >; using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >; + /** + * \brief A template which allows to quickly obtain a \ref DistributedArray type with changed template parameters. + */ + template< typename _Value, + typename _Device = Device, + typename _Index = Index, + typename _Communicator = Communicator > + using Self = DistributedArray< _Value, _Device, _Index, _Communicator >; + + DistributedArray() = default; DistributedArray( DistributedArray& ) = default; @@ -83,13 +91,6 @@ public: void copyFromGlobal( ConstLocalViewType globalArray ); - static String getType(); - - virtual String getTypeVirtual() const; - - // TODO: no getSerializationType method until there is support for serialization - - // Usual Array methods follow below. /** diff --git a/src/TNL/Containers/DistributedArray.hpp b/src/TNL/Containers/DistributedArray.hpp index b8c65552785ba857563436d89ba6611a0e777b6f..c146bbf9f8657e6af5f38a8506d9c944a539c57a 100644 --- a/src/TNL/Containers/DistributedArray.hpp +++ b/src/TNL/Containers/DistributedArray.hpp @@ -14,7 +14,7 @@ #include "DistributedArray.h" -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Communicators/MpiDefs.h> // important only when MPI is disabled namespace TNL { @@ -110,7 +110,7 @@ copyFromGlobal( ConstLocalViewType globalArray ) localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel ); } @@ -160,33 +160,6 @@ operator ConstViewType() const return getConstView(); } -template< typename Value, - typename Device, - typename Index, - typename Communicator > -String -DistributedArray< Value, Device, Index, Communicator >:: -getType() -{ - return String( "Containers::DistributedArray< " ) + - TNL::getType< Value >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + ", " + - // TODO: communicators don't have a getType method - "<Communicator> >"; -} - -template< typename Value, - typename Device, - typename Index, - typename Communicator > -String -DistributedArray< Value, Device, Index, Communicator >:: -getTypeVirtual() const -{ - return getType(); -} - template< typename Value, typename Device, typename Index, diff --git a/src/TNL/Containers/DistributedArrayView.h b/src/TNL/Containers/DistributedArrayView.h index 82a662e39cebecc50735444888ba4c065a5a4287..6022521bc66bc41df4d144a5651a934a52c5e158 100644 --- a/src/TNL/Containers/DistributedArrayView.h +++ b/src/TNL/Containers/DistributedArrayView.h @@ -34,11 +34,19 @@ public: using LocalRangeType = Subrange< Index >; using LocalViewType = Containers::ArrayView< Value, Device, Index >; using ConstLocalViewType = Containers::ArrayView< std::add_const_t< Value >, Device, Index >; - using HostType = DistributedArrayView< Value, Devices::Host, Index, Communicator >; - using CudaType = DistributedArrayView< Value, Devices::Cuda, Index, Communicator >; using ViewType = DistributedArrayView< Value, Device, Index, Communicator >; using ConstViewType = DistributedArrayView< std::add_const_t< Value >, Device, Index, Communicator >; + /** + * \brief A template which allows to quickly obtain a \ref DistributedArrayView type with changed template parameters. + */ + template< typename _Value, + typename _Device = Device, + typename _Index = Index, + typename _Communicator = Communicator > + using Self = DistributedArrayView< _Value, _Device, _Index, _Communicator >; + + // Initialization by raw data __cuda_callable__ DistributedArrayView( const LocalRangeType& localRange, IndexType globalSize, CommunicationGroup group, LocalViewType localData ) @@ -108,9 +116,6 @@ public: void copyFromGlobal( ConstLocalViewType globalArray ); - static String getType(); - - /* * Usual ArrayView methods follow below. */ diff --git a/src/TNL/Containers/DistributedArrayView.hpp b/src/TNL/Containers/DistributedArrayView.hpp index 5cb9c10ed0e5ce4155f9b48f20226baa9106e1a1..0199229d48cab585b78d6618437d9fbcf275092a 100644 --- a/src/TNL/Containers/DistributedArrayView.hpp +++ b/src/TNL/Containers/DistributedArrayView.hpp @@ -180,24 +180,7 @@ copyFromGlobal( ConstLocalViewType globalArray ) localView[ i ] = globalArray[ localRange.getGlobalIndex( i ) ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel ); -} - - -template< typename Value, - typename Device, - typename Index, - typename Communicator > -String -DistributedArrayView< Value, Device, Index, Communicator >:: -getType() -{ - return String( "Containers::DistributedArrayView< " ) + - TNL::getType< Value >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + ", " + - // TODO: communicators don't have a getType method - "<Communicator> >"; + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localRange.getSize(), kernel ); } diff --git a/src/TNL/Containers/DistributedNDArray.h b/src/TNL/Containers/DistributedNDArray.h index 4b123d114faa37e9022d7b5caab6f9c7124c2263..57b94a34b1bd7c210d24462aa1859cc68f087f15 100644 --- a/src/TNL/Containers/DistributedNDArray.h +++ b/src/TNL/Containers/DistributedNDArray.h @@ -392,7 +392,7 @@ public: void allocate() { SizesHolderType localSizes; - TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds ); + Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolderType::getDimension(), LocalSizesSetter >::execHost( localSizes, globalSizes, localBegins, localEnds ); localArray.setSize( localSizes ); } diff --git a/src/TNL/Containers/DistributedNDArraySynchronizer.h b/src/TNL/Containers/DistributedNDArraySynchronizer.h index e6e41ba3338010779a1b110af90e198cdae617aa..6985303785f78e931303d2bddfa51407f4cc7ebc 100644 --- a/src/TNL/Containers/DistributedNDArraySynchronizer.h +++ b/src/TNL/Containers/DistributedNDArraySynchronizer.h @@ -51,7 +51,7 @@ public: array_view.bind( array.getView() ); // allocate buffers - TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view ); + Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), AllocateHelper >::execHost( buffers, array_view ); } else { // only bind to the actual data @@ -80,18 +80,18 @@ protected: #endif // fill send buffers - TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true ); + Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, true ); // issue all send and receive async operations std::vector< typename Communicator::Request > requests; const typename Communicator::CommunicationGroup group = array_view.getCommunicationGroup(); - TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group ); + Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), SendHelper >::execHost( buffers, requests, group ); // wait until send is done Communicator::WaitAll( requests.data(), requests.size() ); // copy data from receive buffers - TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false ); + Algorithms::TemplateStaticFor< std::size_t, 0, DistributedNDArray::getDimension(), CopyHelper >::execHost( buffers, array_view, false ); } template< std::size_t dim > diff --git a/src/TNL/Containers/DistributedVector.h b/src/TNL/Containers/DistributedVector.h index 51d7c537c534d3b5a82178ccc2096f9cea1601f2..db4e46e68616244399e85fa7903ebe53a1e30585 100644 --- a/src/TNL/Containers/DistributedVector.h +++ b/src/TNL/Containers/DistributedVector.h @@ -34,11 +34,19 @@ public: using IndexType = Index; using LocalViewType = Containers::VectorView< Real, Device, Index >; using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >; - using HostType = DistributedVector< Real, Devices::Host, Index, Communicator >; - using CudaType = DistributedVector< Real, Devices::Cuda, Index, Communicator >; using ViewType = DistributedVectorView< Real, Device, Index, Communicator >; using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >; + /** + * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters. + */ + template< typename _Real, + typename _Device = Device, + typename _Index = Index, + typename _Communicator = Communicator > + using Self = DistributedVector< _Real, _Device, _Index, _Communicator >; + + // inherit all constructors and assignment operators from Array using BaseType::DistributedArray; using BaseType::operator=; @@ -69,11 +77,6 @@ public: operator ConstViewType() const; - static String getType(); - - virtual String getTypeVirtual() const; - - /* * Usual Vector methods follow below. */ @@ -128,7 +131,7 @@ public: DistributedVector& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/DistributedVector.hpp b/src/TNL/Containers/DistributedVector.hpp index dbf8b10b8e2fe7714d16d7dcf399564abd01044d..fa49591e8ae53ffd06214772491c656b91601413 100644 --- a/src/TNL/Containers/DistributedVector.hpp +++ b/src/TNL/Containers/DistributedVector.hpp @@ -13,7 +13,7 @@ #pragma once #include "DistributedVector.h" -#include <TNL/Containers/Algorithms/DistributedScan.h> +#include <TNL/Algorithms/DistributedScan.h> namespace TNL { namespace Containers { @@ -83,34 +83,6 @@ operator ConstViewType() const } -template< typename Real, - typename Device, - typename Index, - typename Communicator > -String -DistributedVector< Real, Device, Index, Communicator >:: -getType() -{ - return String( "Containers::DistributedVector< " ) + - TNL::getType< Real >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + ", " + - // TODO: communicators don't have a getType method - "<Communicator> >"; -} - -template< typename Real, - typename Device, - typename Index, - typename Communicator > -String -DistributedVector< Real, Device, Index, Communicator >:: -getTypeVirtual() const -{ - return getType(); -} - - /* * Usual Vector methods follow below. */ @@ -301,7 +273,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVector< Real, Device, Index, Communicator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); diff --git a/src/TNL/Containers/DistributedVectorView.h b/src/TNL/Containers/DistributedVectorView.h index 99764432de6488aa557f4922e4e3ad1c3dbcfe7e..70452c50d17b8a08004a3ad162005154eb228ba8 100644 --- a/src/TNL/Containers/DistributedVectorView.h +++ b/src/TNL/Containers/DistributedVectorView.h @@ -35,11 +35,19 @@ public: using IndexType = Index; using LocalViewType = Containers::VectorView< Real, Device, Index >; using ConstLocalViewType = Containers::VectorView< std::add_const_t< Real >, Device, Index >; - using HostType = DistributedVectorView< Real, Devices::Host, Index, Communicator >; - using CudaType = DistributedVectorView< Real, Devices::Cuda, Index, Communicator >; using ViewType = DistributedVectorView< Real, Device, Index, Communicator >; using ConstViewType = DistributedVectorView< std::add_const_t< Real >, Device, Index, Communicator >; + /** + * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters. + */ + template< typename _Real, + typename _Device = Device, + typename _Index = Index, + typename _Communicator = Communicator > + using Self = DistributedVectorView< _Real, _Device, _Index, _Communicator >; + + // inherit all constructors and assignment operators from ArrayView using BaseType::DistributedArrayView; using BaseType::operator=; @@ -72,8 +80,6 @@ public: __cuda_callable__ ConstViewType getConstView() const; - static String getType(); - /* * Usual Vector methods follow below. */ @@ -128,7 +134,7 @@ public: DistributedVectorView& operator/=( const Vector& vector ); template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/DistributedVectorView.hpp b/src/TNL/Containers/DistributedVectorView.hpp index 6a934d8c25d2a786257785787f55adcafba79804..70f61979fd44fb8d3f9d1878eb2c4a6ecd5c169b 100644 --- a/src/TNL/Containers/DistributedVectorView.hpp +++ b/src/TNL/Containers/DistributedVectorView.hpp @@ -13,7 +13,7 @@ #pragma once #include "DistributedVectorView.h" -#include <TNL/Containers/Algorithms/DistributedScan.h> +#include <TNL/Algorithms/DistributedScan.h> namespace TNL { namespace Containers { @@ -64,22 +64,6 @@ getConstView() const return *this; } -template< typename Real, - typename Device, - typename Index, - typename Communicator > -String -DistributedVectorView< Real, Device, Index, Communicator >:: -getType() -{ - return String( "Containers::DistributedVectorView< " ) + - TNL::getType< Real >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + ", " + - // TODO: communicators don't have a getType method - "<Communicator> >"; -} - /* * Usual Vector methods follow below. @@ -277,7 +261,7 @@ template< typename Real, template< Algorithms::ScanType Type > void DistributedVectorView< Real, Device, Index, Communicator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); diff --git a/src/TNL/Containers/Expressions/Comparison.h b/src/TNL/Containers/Expressions/Comparison.h index 616ad5807864a1b1f2cd7b5af765132021c99a24..98e39ad8c561240f2ac0522844dd362059ebf710 100644 --- a/src/TNL/Containers/Expressions/Comparison.h +++ b/src/TNL/Containers/Expressions/Comparison.h @@ -14,8 +14,8 @@ #include <TNL/Assert.h> #include <TNL/Containers/Expressions/ExpressionVariableType.h> -#include <TNL/Containers/Algorithms/Reduction.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Algorithms/Reduction.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> namespace TNL { namespace Containers { @@ -45,7 +45,7 @@ struct VectorComparison< T1, T2, true > return false; if( a.getSize() == 0 ) return true; - return Algorithms::ArrayOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() ); + return Algorithms::MultiDeviceMemoryOperations< typename T1::DeviceType, typename T2::DeviceType >::compare( a.getData(), b.getData(), a.getSize() ); } }; diff --git a/src/TNL/Containers/Expressions/DistributedComparison.h b/src/TNL/Containers/Expressions/DistributedComparison.h index 7a7d5c5bebf24fca09a2f1837561483d27f8f9b8..b5e0e96a9d756f55edb7bf15652ad1d0d34bc5d6 100644 --- a/src/TNL/Containers/Expressions/DistributedComparison.h +++ b/src/TNL/Containers/Expressions/DistributedComparison.h @@ -11,7 +11,6 @@ #pragma once #include <TNL/Containers/Expressions/ExpressionVariableType.h> -#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Communicators/MpiDefs.h> namespace TNL { diff --git a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h index fe8997aac8a244dfbafd14c3b705fc31abb6b6f7..355689039884e8e05caff040e232ffbcf26119ba 100644 --- a/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/DistributedExpressionTemplates.h @@ -2207,7 +2207,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2226,7 +2226,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } //// @@ -2252,7 +2252,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2275,7 +2275,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } //// @@ -2301,7 +2301,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2324,7 +2324,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/ExpressionTemplates.h b/src/TNL/Containers/Expressions/ExpressionTemplates.h index 763bdbfd1e85545c5ddf420fc6725066c245b7de..a0980baf6cf8ee751444df7d27205dfa9260f593 100644 --- a/src/TNL/Containers/Expressions/ExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/ExpressionTemplates.h @@ -2130,7 +2130,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2149,7 +2149,7 @@ Result evaluateAndReduce( Vector& lhs, RealType* lhs_data = lhs.getData(); auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return ( lhs_data[ i ] = expression[ i ] ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } //// @@ -2175,7 +2175,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2198,7 +2198,7 @@ Result addAndReduce( Vector& lhs, lhs_data[ i ] += aux; return aux; }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } //// @@ -2224,7 +2224,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } template< typename Vector, @@ -2247,7 +2247,7 @@ Result addAndReduceAbs( Vector& lhs, lhs_data[ i ] += aux; return TNL::abs( aux ); }; - return Containers::Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); + return Algorithms::Reduction< DeviceType >::reduce( lhs.getSize(), reduction, fetch, zero ); } } // namespace TNL diff --git a/src/TNL/Containers/Expressions/VerticalOperations.h b/src/TNL/Containers/Expressions/VerticalOperations.h index 29e904bbfafb5c338a523227bb9e226d4fda9970..84d362e8aab01b704cdefac44ae0d7e0e6a7150d 100644 --- a/src/TNL/Containers/Expressions/VerticalOperations.h +++ b/src/TNL/Containers/Expressions/VerticalOperations.h @@ -13,7 +13,7 @@ #include <limits> #include <type_traits> -#include <TNL/Containers/Algorithms/Reduction.h> +#include <TNL/Algorithms/Reduction.h> //// // By vertical operations we mean those applied across vector elements or diff --git a/src/TNL/Containers/List.h b/src/TNL/Containers/List.h deleted file mode 100644 index 0cf6f762dbfce6057af4132659064fc889c91082..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/List.h +++ /dev/null @@ -1,229 +0,0 @@ -/*************************************************************************** - List.h - description - ------------------- - begin : Sat, 10 Apr 2004 15:58:51 +0100 - copyright : (C) 2004 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <iostream> - -#include <TNL/Assert.h> -#include <TNL/File.h> -#include <TNL/String.h> -#include <TNL/param-types.h> - -namespace TNL { -namespace Containers { - -template< class T > class ListDataElement; - -/// \brief Template for double linked lists -/*! To acces elements in the list one can use method getSize() and - operator[](). To add elements there are methods Append(), - Prepend() and Insert() to insert an element at given - position. To erase particular element there is method - Erase() taking the element position. To erase all elements - there is method reset(). There are also alternatives DeepErase() - and DeepEraseAll() to free dynamicaly allocated data inside the - data elements. - The list stores pointer to last accesed element so if one goes - seqeuntialy through the list there is no inefficiency. The - accesing algorithm is also able to deside whether to start from - the last accesed position or from the begining resp. from the end - of the list. So with common use one does not need to worry about - efficiency :-) - */ -template< class T > class List -{ - public: - typedef T ValueType; - - /// \brief Basic constructor. - /// - /// Constructs an empty list. - List(); - - /// \brief Copy constructor. - /// - /// Construct a copy of \e list. - /// \param list Name of another list. - List( const List& list ); - - /// \brief Destructor. - /// - /// Destroys the list. References to the values in the list become invalid. - ~List(); - - /// Returns the type of list. - static String getType(); - - /// Returns \e true if the list contains no items, otherwise returns \e false. - bool isEmpty() const; - - /// Returns number of items in the list. - int getSize() const; - - /// Indexing operator. - T& operator[] ( const int& ind ); - - /// Indexing operator for constant instances. - const T& operator[] ( const int& ind ) const; - - const List& operator = ( const List& lst ); - - bool operator == ( const List& lst ) const; - - bool operator != ( const List& lst ) const; - - /// \brief Appends new data element. - /// - /// Inserts \e data at the end of the list. - bool Append( const T& data ); - - /// \brief Prepends new data element. - /// - /// Inserts \e data at the beginning of the list. - bool Prepend( const T& data ); - - /// \brief Inserts new data element at given position. - /// - /// Inserts \e data at index position \e ind in the list. - bool Insert( const T& data, const int& ind ); - - /// Appends copy of another list. - /// - /// \param lst Name of another list. - bool AppendList( const List< T >& lst ); - - /// Prepends copy of another list. - /// - /// \param lst Name of another list. - bool PrependList( const List< T >& lst ); - - /// Transforms list to an \e array. - template< typename Array > - void toArray( Array& array ); - - /*** - * \brief Checks if there is an element with value \e v in given array. - * - * \param v Reference to a value. - */ - bool containsValue( const T& v ) const; - - /// Erases data element at given position. - /// - /// \param ind Index of the data element one chooses to remove. - void Erase( const int& ind ); - - /// Erases data element with contained data at given position. - /// - /// \param ind Index of the data element one chooses to remove. - void DeepErase( const int& ind ); - - /// Erases all data elements. - void reset(); - - /// \brief Erases all data elements with contained data. - /// - /// Frees dynamicaly allocated data inside the data elements - void DeepEraseAll(); - - /// Saves the list in binary format. - /// - /// \param file Name of file. - bool Save( File& file ) const; - - /// Saves the list in binary format using method save of type T. - /// - /// \param file Name of file. - bool DeepSave( File& file ) const; - - /// Loads the list from file. - /// - /// \param file Name of file. - bool Load( File& file ); - - /// Loads the list from file using method Load of the type T. - /// - /// \param file Name of file. - bool DeepLoad( File& file ); - - protected: - /// Pointer to the first element. - ListDataElement< T >* first; - - /// Pointer to the last element. - /*! We use pointer to last element while adding new element to keep order of elements - */ - ListDataElement< T >* last; - - /// List size. - int size; - - /// Iterator. - mutable ListDataElement< T >* iterator; - - /// Iterator index. - mutable int index; -}; - -template< typename T > std::ostream& operator << ( std::ostream& str, const List< T >& list ); - -//! Data element for List and mStack -template< class T > class ListDataElement -{ - //! Main data - T data; - - //! Pointer to the next element - ListDataElement< T >* next; - - //! Pointer to the previous element - ListDataElement< T >* previous; - - public: - //! Basic constructor - ListDataElement() - : next( 0 ), - previous( 0 ){}; - - //! Constructor with given data and possibly pointer to next element - ListDataElement( const T& dt, - ListDataElement< T >* prv = 0, - ListDataElement< T >* nxt = 0 ) - : data( dt ), - next( nxt ), - previous( prv ){}; - - //! Destructor - ~ListDataElement(){}; - - //! Return data for non-const instances - T& Data() { return data; }; - - //! Return data for const instances - const T& Data() const { return data; }; - - //! Return pointer to the next element for non-const instances - ListDataElement< T >*& Next() { return next; }; - - //! Return pointer to the next element for const instances - const ListDataElement< T >* Next() const { return next; }; - - //! Return pointer to the previous element for non-const instances - ListDataElement< T >*& Previous() { return previous; }; - - //! Return pointer to the previous element for const instances - const ListDataElement< T >* Previous() const { return previous; }; -}; - -} // namespace Containers -} // namespace TNL - -#include <TNL/Containers/List_impl.h> diff --git a/src/TNL/Containers/List_impl.h b/src/TNL/Containers/List_impl.h deleted file mode 100644 index a8bcb81158ad187b3a3573e2a4c34c758f64640f..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/List_impl.h +++ /dev/null @@ -1,352 +0,0 @@ -/*************************************************************************** - List_impl.h - description - ------------------- - begin : Mar, 5 Apr 2016 12:46 PM - copyright : (C) 2016 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Containers/List.h> -#include <TNL/Math.h> - -namespace TNL { -namespace Containers { - -template< typename T > -List< T >::List() - : first( 0 ), last( 0 ), size( 0 ), iterator( 0 ), index( 0 ) -{ -} - -template< typename T > -List< T >::List( const List& list ) - : first( 0 ), last( 0 ), size( 0 ), iterator( 0 ), index( 0 ) -{ - AppendList( list ); -} - -template< typename T > -List< T >::~List() -{ - reset(); -} - -template< typename T > -String List< T >::getType() -{ - return String( "Containers::List< " ) + TNL::getType< T >() + String( " >" ); -} - -template< typename T > -bool List< T >::isEmpty() const -{ - return ! size; -} - -template< typename T > -int List< T >::getSize() const -{ - return size; -} - -template< typename T > -T& List< T >::operator[]( const int& ind ) -{ - TNL_ASSERT( ind < size, ); - int iter_dist = TNL::abs( index - ind ); - if( ! iterator || - iter_dist > ind || - iter_dist > size - ind ) - { - if( ind < size - ind ) - { - //cout << "Setting curent index to 0." << std::endl; - index = 0; - iterator = first; - } - else - { - //cout << "Setting curent index to size - 1." << std::endl; - index = size - 1; - iterator = last; - } - } - while( index != ind ) - { - //cout << " current index = " << index - // << " index = " << ind << std::endl; - if( ind < index ) - { - iterator = iterator -> Previous(); - index --; - } - else - { - iterator = iterator -> Next(); - index ++; - } - TNL_ASSERT( iterator, ); - } - return iterator -> Data(); -}; - -template< typename T > -const T& List< T >::operator[]( const int& ind ) const -{ - return const_cast< List< T >* >( this ) -> operator[]( ind ); -} - -template< typename T > -const List< T >& List< T >::operator = ( const List& lst ) -{ - AppendList( lst ); - return( *this ); -} - -template< typename T > -bool List< T >::operator == ( const List& lst ) const -{ - if( this->getSize() != lst.getSize() ) - return false; - for( int i = 0; i < this->getSize(); i++ ) - if( (*this)[ i ] != lst[ i ] ) - return false; - return true; -} - -template< typename T > -bool List< T >::operator != ( const List& lst ) const -{ - return ! operator==( lst ); -} - -template< typename T > -bool List< T >::Append( const T& data ) -{ - if( ! first ) - { - TNL_ASSERT( ! last, ); - first = last = new ListDataElement< T >( data ); - } - else - { - ListDataElement< T >* new_element = new ListDataElement< T >( data, last, 0 ); - TNL_ASSERT( last, ); - last = last -> Next() = new_element; - } - size ++; - return true; -}; - -template< typename T > -bool List< T >::Prepend( const T& data ) -{ - if( ! first ) - { - TNL_ASSERT( ! last, ); - first = last = new ListDataElement< T >( data ); - } - else - { - ListDataElement< T >* new_element = new ListDataElement< T >( data, 0, first ); - first = first -> Previous() = new_element; - } - size ++; - index ++; - return true; -}; - -template< typename T > -bool List< T >::Insert( const T& data, const int& ind ) -{ - TNL_ASSERT( ind <= size || ! size, ); - if( ind == 0 ) return Prepend( data ); - if( ind == size ) return Append( data ); - operator[]( ind ); - ListDataElement< T >* new_el = - new ListDataElement< T >( data, - iterator -> Previous(), - iterator ); - iterator -> Previous() -> Next() = new_el; - iterator -> Previous() = new_el; - iterator = new_el; - size ++; - return true; -}; - -template< typename T > -bool List< T >::AppendList( const List< T >& lst ) -{ - int i; - for( i = 0; i < lst. getSize(); i ++ ) - { - if( ! Append( lst[ i ] ) ) return false; - } - return true; -}; - -template< typename T > -bool List< T >::PrependList( const List< T >& lst ) - -{ - int i; - for( i = lst. getSize(); i > 0; i -- ) - if( ! Prepend( lst[ i - 1 ] ) ) return false; - return true; -}; - -template< typename T > - template< typename Array > -void List< T >::toArray( Array& array ) -{ - array.setSize( this->getSize() ); - for( int i = 0; i < this->getSize(); i++ ) - array[ i ] = ( *this )[ i ]; -} -template< typename T > -bool List< T >::containsValue( const T& v ) const -{ - for( int i = 0; i < this->getSize(); i++ ) - if( ( *this )[ i ] == v ) - return true; - return false; -} - -template< typename T > -void List< T >::Erase( const int& ind ) -{ - operator[]( ind ); - ListDataElement< T >* tmp_it = iterator; - if( iterator -> Next() ) - iterator -> Next() -> Previous() = iterator -> Previous(); - if( iterator -> Previous() ) - iterator -> Previous() -> Next() = iterator -> Next(); - if( iterator -> Next() ) iterator = iterator -> Next(); - else - { - iterator = iterator -> Previous(); - index --; - } - if( first == tmp_it ) first = iterator; - if( last == tmp_it ) last = iterator; - delete tmp_it; - size --; -}; - -template< typename T > -void List< T >::DeepErase( const int& ind ) -{ - operator[]( ind ); - delete iterator -> Data(); - Erase( ind ); -}; - -template< typename T > -void List< T >::reset() -{ - iterator = first; - ListDataElement< T >* tmp_it; - while( iterator ) - { - TNL_ASSERT( iterator, ); - tmp_it = iterator; - iterator = iterator -> Next(); - delete tmp_it; - } - first = last = 0; - size = 0; -}; - -template< typename T > -void List< T >::DeepEraseAll() -{ - iterator = first; - ListDataElement< T >* tmp_it; - int i( 0 ); - while( iterator ) - { - tmp_it = iterator; - iterator = iterator -> Next(); - delete tmp_it -> Data(); - delete tmp_it; - i++; - } - first = last = 0; - size = 0; -}; - -template< typename T > -bool List< T >::Save( File& file ) const -{ - file.save( &size ); - for( int i = 0; i < size; i ++ ) - if( ! file. save( &operator[]( i ), 1 ) ) - return false; - return true; -} - -template< typename T > -bool List< T >::DeepSave( File& file ) const -{ - file.save( &size ); - for( int i = 0; i < size; i ++ ) - if( ! operator[]( i ). save( file ) ) return false; - return true; -} - -template< typename T > -bool List< T >::Load( File& file ) -{ - reset(); - int _size; - file.load( &_size, 1 ); - if( _size < 0 ) - { - std::cerr << "The curve size is negative." << std::endl; - return false; - } - T t; - for( int i = 0; i < _size; i ++ ) - { - if( ! file.load( &t, 1 ) ) - return false; - Append( t ); - } - return true; -}; - -template< typename T > -bool List< T >::DeepLoad( File& file ) -{ - reset(); - int _size; - file.load( &_size ); - if( _size < 0 ) - { - std::cerr << "The list size is negative." << std::endl; - return false; - } - for( int i = 0; i < _size; i ++ ) - { - T t; - if( ! t. load( file ) ) return false; - Append( t ); - } - return true; -}; - -template< typename T > -std::ostream& operator << ( std::ostream& str, const List< T >& list ) -{ - int i, size( list. getSize() ); - for( i = 0; i < size; i ++ ) - str << "Item " << i << ":" << list[ i ] << std::endl; - return str; -}; - -} // namespace Containers -} // namespace TNL diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h index 76e61846afebe3e5dab4706556550e95db667db3..5e575cc21ce8292ba3f9d3d4c8ed4b189b056936 100644 --- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h +++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.h @@ -52,10 +52,6 @@ class EllpackIndexMultimap template< typename Device_ > EllpackIndexMultimap& operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >& other ); - static String getType(); - - String getTypeVirtual() const; - void setKeysRange( const IndexType& keysRange ); __cuda_callable__ diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp index 8aaba006ac3ce7e9a71e333a8185031ff8de8c82..6fb1f4b26d5a4c3b8447e1156c89641118be3c32 100644 --- a/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp +++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimap.hpp @@ -43,34 +43,6 @@ operator=( const EllpackIndexMultimap< Index, Device_, LocalIndex, SliceSize >& return *this; } -template< typename Index, - typename Device, - typename LocalIndex, - int SliceSize > -String -EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >:: -getType() -{ - return String( "EllpackIndexMultimap< ") + - String( TNL::getType< Index >() ) + - String( ", " ) + - Device :: getDeviceType() + - String( ", " ) + - String( TNL::getType< LocalIndexType >() ) + - String( " >" ); -} - -template< typename Index, - typename Device, - typename LocalIndex, - int SliceSize > -String -EllpackIndexMultimap< Index, Device, LocalIndex, SliceSize >:: -getTypeVirtual() const -{ - return this->getType(); -} - template< typename Index, typename Device, typename LocalIndex, diff --git a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h index fe7a0fb380230909be094042f69cf3ddabd24522..9be47980d1dbef78af8891ff50837d70fb851c22 100644 --- a/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h +++ b/src/TNL/Containers/Multimaps/EllpackIndexMultimapValues.h @@ -13,7 +13,7 @@ #include <type_traits> #include <ostream> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h index 2acd3c5d480aeb479fed6c2ab781e1d3c9cf68d2..9533393059255bc7151e803e18cec2f1829ea4b7 100644 --- a/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h +++ b/src/TNL/Containers/Multimaps/MultimapPermutationApplier.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Pointers/DevicePointer.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Containers { @@ -48,11 +48,11 @@ void permuteMultimapKeys( Multimap& multimap, const PermutationVector& perm ) Pointers::DevicePointer< Multimap > multimapPointer( multimap ); Pointers::DevicePointer< Multimap > multimapCopyPointer( multimapCopy ); - ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(), - kernel, - &multimapPointer.template getData< DeviceType >(), - &multimapCopyPointer.template modifyData< DeviceType >(), - perm.getData() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(), + kernel, + &multimapPointer.template getData< DeviceType >(), + &multimapCopyPointer.template modifyData< DeviceType >(), + perm.getData() ); // copy the permuted data back into the multimap multimap = multimapCopy; @@ -79,10 +79,10 @@ void permuteMultimapValues( Multimap& multimap, const PermutationVector& iperm ) }; Pointers::DevicePointer< Multimap > multimapPointer( multimap ); - ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(), - kernel, - &multimapPointer.template modifyData< DeviceType >(), - iperm.getData() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, multimap.getKeysRange(), + kernel, + &multimapPointer.template modifyData< DeviceType >(), + iperm.getData() ); } } // namespace Multimaps diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h index ba9994da9bb933fa7da825b550cca73bdc2e7498..f816cabd2c37626b978a03dbdf6a6ed63076036d 100644 --- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h +++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.h @@ -52,10 +52,6 @@ class StaticEllpackIndexMultimap template< typename Device_ > StaticEllpackIndexMultimap& operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalIndex, SliceSize >& other ); - static String getType(); - - String getTypeVirtual() const; - void setKeysRange( const IndexType& keysRange ); __cuda_callable__ diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp index c8dcd637eeb05fe3d763606a40c16c2e7dd6833e..3e03f9e03a5c22cad10e242b93dff20151a37598 100644 --- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp +++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimap.hpp @@ -43,36 +43,6 @@ operator=( const StaticEllpackIndexMultimap< ValuesCount, Index, Device_, LocalI return *this; } -template< int ValuesCount, - typename Index, - typename Device, - typename LocalIndex, - int SliceSize > -String -StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >:: -getType() -{ - return String( "StaticEllpackIndexMultimap< ") + - String( TNL::getType< Index >() ) + - String( ", " ) + - Device :: getDeviceType() + - String( ", " ) + - String( TNL::getType< LocalIndexType >() ) + - String( " >" ); -} - -template< int ValuesCount, - typename Index, - typename Device, - typename LocalIndex, - int SliceSize > -String -StaticEllpackIndexMultimap< ValuesCount, Index, Device, LocalIndex, SliceSize >:: -getTypeVirtual() const -{ - return this->getType(); -} - template< int ValuesCount, typename Index, typename Device, diff --git a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h index 95ffade9fcad3674a7092bc69701e2a2500ab819..efae4f05173b9f0531cc12e96dd5644a5c72fefe 100644 --- a/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h +++ b/src/TNL/Containers/Multimaps/StaticEllpackIndexMultimapValues.h @@ -13,7 +13,7 @@ #include <type_traits> #include <ostream> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { namespace Containers { diff --git a/src/TNL/Containers/NDArray.h b/src/TNL/Containers/NDArray.h index 8472f4d7151b9896a20c3f20af5d302286969022..3cbc8a7bc1c484cecbb847db248525a42b756ae5 100644 --- a/src/TNL/Containers/NDArray.h +++ b/src/TNL/Containers/NDArray.h @@ -352,13 +352,13 @@ class StaticNDArray SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >, - void > + Devices::Sequential > { using Base = NDArrayStorage< StaticArray< __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get(), Value >, SizesHolder, Permutation, __ndarray_impl::NDArrayBase< SliceInfo< 0, 0 > >, - void >; + Devices::Sequential >; static_assert( __ndarray_impl::StaticStorageSizeGetter< SizesHolder >::get() > 0, "All dimensions of a static array must to be positive." ); diff --git a/src/TNL/Containers/NDArrayView.h b/src/TNL/Containers/NDArrayView.h index 3e37de372521cddb69db001ec05f6b238d644e15..d5d94d61eeee144491cd2d359a0bea16825e9b55 100644 --- a/src/TNL/Containers/NDArrayView.h +++ b/src/TNL/Containers/NDArrayView.h @@ -18,7 +18,8 @@ #include <TNL/Containers/ndarray/Executors.h> #include <TNL/Containers/ndarray/BoundaryExecutors.h> #include <TNL/Containers/ndarray/Operations.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> namespace TNL { namespace Containers { @@ -75,7 +76,7 @@ public: { TNL_ASSERT_EQ( getSizes(), other.getSizes(), "The sizes of the array views must be equal, views are not resizable." ); if( getStorageSize() > 0 ) - Algorithms::ArrayOperations< DeviceType >::copy( array, other.array, getStorageSize() ); + Algorithms::MemoryOperations< DeviceType >::copy( array, other.array, getStorageSize() ); return *this; } @@ -93,7 +94,7 @@ public: "The sizes of the array views must be equal, views are not resizable." ); if( getStorageSize() > 0 ) { TNL_ASSERT_TRUE( array, "Attempted to assign to an empty view." ); - Algorithms::ArrayOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() ); + Algorithms::MultiDeviceMemoryOperations< DeviceType, typename OtherView::DeviceType >::copy( array, other.getData(), getStorageSize() ); } return *this; } @@ -138,7 +139,7 @@ public: if( getSizes() != other.getSizes() ) return false; // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray - return Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() ); + return Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() ); } TNL_NVCC_HD_WARNING_DISABLE @@ -148,7 +149,7 @@ public: if( getSizes() != other.getSizes() ) return true; // FIXME: uninitialized data due to alignment in NDArray and padding in SlicedNDArray - return ! Algorithms::ArrayOperations< Device, Device >::compare( array, other.array, getStorageSize() ); + return ! Algorithms::MemoryOperations< Device >::compare( array, other.array, getStorageSize() ); } __cuda_callable__ diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h index 2421305a7df26e1949a70d905092ddf6ab26edaa..51ee055066fab43d3eaca7a53e5a1bc1bee2abb0 100644 --- a/src/TNL/Containers/StaticArray.h +++ b/src/TNL/Containers/StaticArray.h @@ -84,6 +84,7 @@ public: * * @param elems input initializer list */ + __cuda_callable__ StaticArray( const std::initializer_list< Value > &elems ); /** @@ -105,10 +106,6 @@ public: __cuda_callable__ StaticArray( const Value& v1, const Value& v2, const Value& v3 ); - /** - * \brief Gets type of this array. - */ - static String getType(); /** * \brief Gets pointer to data of this static array. diff --git a/src/TNL/Containers/StaticArray.hpp b/src/TNL/Containers/StaticArray.hpp index 89a66ecc91af572edd3307be9993e2dc330dddc5..c1ac8e62a7a2e8257d0824c36d7f4caaa443e661 100644 --- a/src/TNL/Containers/StaticArray.hpp +++ b/src/TNL/Containers/StaticArray.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - StaticArray_impl.h - description + StaticArray.hpp - description ------------------- begin : Feb 10, 2014 copyright : (C) 2014 by Tomas Oberhuber @@ -10,11 +10,11 @@ #pragma once -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> #include <TNL/Math.h> #include <TNL/Containers/StaticArray.h> -#include <TNL/Containers/Algorithms/StaticArrayAssignment.h> -#include <TNL/StaticFor.h> +#include <TNL/Containers/detail/StaticArrayAssignment.h> +#include <TNL/Algorithms/StaticFor.h> namespace TNL { namespace Containers { @@ -102,24 +102,25 @@ template< int Size, typename Value > __cuda_callable__ StaticArray< Size, Value >::StaticArray( const Value v[ Size ] ) { - StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v ); } template< int Size, typename Value > __cuda_callable__ StaticArray< Size, Value >::StaticArray( const Value& v ) { - StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, v ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, v ); } template< int Size, typename Value > __cuda_callable__ StaticArray< Size, Value >::StaticArray( const StaticArray< Size, Value >& v ) { - StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, v.getData() ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, v.getData() ); } template< int Size, typename Value > +__cuda_callable__ StaticArray< Size, Value >::StaticArray( const std::initializer_list< Value > &elems) { auto it = elems.begin(); @@ -146,16 +147,6 @@ StaticArray< Size, Value >::StaticArray( const Value& v1, const Value& v2, const data[ 2 ] = v3; } -template< int Size, typename Value > -String StaticArray< Size, Value >::getType() -{ - return String( "Containers::StaticArray< " ) + - convertToString( Size ) + - String( ", " ) + - TNL::getType< Value >() + - String( " >" ); -} - template< int Size, typename Value > __cuda_callable__ Value* StaticArray< Size, Value >::getData() @@ -237,7 +228,7 @@ template< int Size, typename Value > __cuda_callable__ StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const StaticArray< Size, Value >& array ) { - StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, data, array.getData() ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, data, array.getData() ); return *this; } @@ -246,7 +237,7 @@ template< int Size, typename Value > __cuda_callable__ StaticArray< Size, Value >& StaticArray< Size, Value >::operator=( const T& v ) { - Algorithms::StaticArrayAssignment< StaticArray, T >::assign( *this, v ); + detail::StaticArrayAssignment< StaticArray, T >::assign( *this, v ); return *this; } @@ -273,7 +264,7 @@ StaticArray< Size, Value >:: operator StaticArray< Size, OtherValue >() const { StaticArray< Size, OtherValue > aux; - StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), data ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), data ); return aux; } @@ -281,20 +272,20 @@ template< int Size, typename Value > __cuda_callable__ void StaticArray< Size, Value >::setValue( const ValueType& val ) { - StaticFor< 0, Size >::exec( Algorithms::detail::AssignValueFunctor{}, data, val ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignValueFunctor{}, data, val ); } template< int Size, typename Value > bool StaticArray< Size, Value >::save( File& file ) const { - file.save< Value, Value, Devices::Host >( data, Size ); + file.save( data, Size ); return true; } template< int Size, typename Value > bool StaticArray< Size, Value >::load( File& file) { - file.load< Value, Value, Devices::Host >( data, Size ); + file.load( data, Size ); return true; } diff --git a/src/TNL/Containers/StaticVector.h b/src/TNL/Containers/StaticVector.h index a15420d07bb9edf1940329eb4597c7f4e81726de..2fe136ac951d4c02bb339e4c0edcee43f3c3b7d8 100644 --- a/src/TNL/Containers/StaticVector.h +++ b/src/TNL/Containers/StaticVector.h @@ -53,11 +53,13 @@ public: /** * \brief Default copy-assignment operator. */ + __cuda_callable__ StaticVector& operator=( const StaticVector& ) = default; /** * \brief Default move-assignment operator. */ + __cuda_callable__ StaticVector& operator=( StaticVector&& ) = default; //! Constructors and assignment operators are inherited from the class \ref StaticArray. @@ -94,11 +96,6 @@ public: bool setup( const Config::ParameterContainer& parameters, const String& prefix = "" ); - /** - * \brief Gets type of this vector. - */ - static String getType(); - /** * \brief Assignment operator with a vector expression. * @@ -108,6 +105,7 @@ public: * \return reference to this vector */ template< typename VectorExpression > + __cuda_callable__ StaticVector& operator=( const VectorExpression& expression ); /** @@ -185,6 +183,7 @@ namespace TNL { namespace Containers { template< typename Real > +__cuda_callable__ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u, const StaticVector< 3, Real >& v ) { @@ -196,6 +195,7 @@ StaticVector< 3, Real > VectorProduct( const StaticVector< 3, Real >& u, } template< typename Real > +__cuda_callable__ Real TriangleArea( const StaticVector< 2, Real >& a, const StaticVector< 2, Real >& b, const StaticVector< 2, Real >& c ) @@ -213,6 +213,7 @@ Real TriangleArea( const StaticVector< 2, Real >& a, } template< typename Real > +__cuda_callable__ Real TriangleArea( const StaticVector< 3, Real >& a, const StaticVector< 3, Real >& b, const StaticVector< 3, Real >& c ) diff --git a/src/TNL/Containers/StaticVector.hpp b/src/TNL/Containers/StaticVector.hpp index 8442db66124c20f32d1409398e716c012ca2fe1b..dc97eeea99b5d551e11beb7543e5f0822f79fad4 100644 --- a/src/TNL/Containers/StaticVector.hpp +++ b/src/TNL/Containers/StaticVector.hpp @@ -11,7 +11,7 @@ #pragma once #include <TNL/Containers/StaticVector.h> -#include <TNL/Containers/Algorithms/VectorAssignment.h> +#include <TNL/Containers/detail/VectorAssignment.h> namespace TNL { namespace Containers { @@ -20,9 +20,10 @@ template< int Size, typename Real > template< typename T1, typename T2, template< typename, typename > class Operation > +__cuda_callable__ StaticVector< Size, Real >::StaticVector( const Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation >& expr ) { - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr ); + detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticBinaryExpressionTemplate< T1, T2, Operation > >::assignStatic( *this, expr ); } template< int Size, @@ -32,7 +33,7 @@ template< int Size, __cuda_callable__ StaticVector< Size, Real >::StaticVector( const Expressions::StaticUnaryExpressionTemplate< T, Operation >& expr ) { - Algorithms::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr ); + detail::VectorAssignment< StaticVector< Size, Real >, Expressions::StaticUnaryExpressionTemplate< T, Operation > >::assignStatic( *this, expr ); } template< int Size, typename Real > @@ -50,22 +51,13 @@ StaticVector< Size, Real >::setup( const Config::ParameterContainer& parameters, return true; } -template< int Size, typename Real > -String StaticVector< Size, Real >::getType() -{ - return String( "Containers::StaticVector< " ) + - convertToString( Size ) + - String( ", " ) + - TNL::getType< Real >() + - String( " >" ); -} - template< int Size, typename Real > template< typename VectorExpression > +__cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator=( const VectorExpression& expression ) { - Algorithms::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression ); + detail::VectorAssignment< StaticVector< Size, Real >, VectorExpression >::assignStatic( *this, expression ); return *this; } @@ -74,7 +66,7 @@ template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator+=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression ); + detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::additionStatic( *this, expression ); return *this; } @@ -83,7 +75,7 @@ template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator-=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression ); + detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::subtractionStatic( *this, expression ); return *this; } @@ -92,7 +84,7 @@ template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator*=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression ); + detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::multiplicationStatic( *this, expression ); return *this; } @@ -101,7 +93,7 @@ template< int Size, typename Real > __cuda_callable__ StaticVector< Size, Real >& StaticVector< Size, Real >::operator/=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression ); + detail::VectorAssignmentWithOperation< StaticVector, VectorExpression >::divisionStatic( *this, expression ); return *this; } @@ -112,7 +104,7 @@ StaticVector< Size, Real >:: operator StaticVector< Size, OtherReal >() const { StaticVector< Size, OtherReal > aux; - StaticFor< 0, Size >::exec( Algorithms::detail::AssignArrayFunctor{}, aux.getData(), this->getData() ); + Algorithms::StaticFor< 0, Size >::exec( detail::AssignArrayFunctor{}, aux.getData(), this->getData() ); return aux; } diff --git a/src/TNL/Containers/Subrange.h b/src/TNL/Containers/Subrange.h index 08911855337817712c960047bf9688a7f134a752..17e02c45f96ff5be79bde0caf8692d25db75166e 100644 --- a/src/TNL/Containers/Subrange.h +++ b/src/TNL/Containers/Subrange.h @@ -16,7 +16,7 @@ #include <TNL/Assert.h> #include <TNL/String.h> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> namespace TNL { namespace Containers { @@ -54,11 +54,6 @@ public: end = 0; } - static String getType() - { - return "Subrange< " + TNL::getType< Index >() + " >"; - } - // Checks if a global index is in the set of local indices. __cuda_callable__ bool isLocal( Index i ) const @@ -127,7 +122,7 @@ protected: template< typename Index > std::ostream& operator<<( std::ostream& str, const Subrange< Index >& range ) { - return str << Subrange< Index >::getType() << "( " << range.getBegin() << ", " << range.getEnd() << " )"; + return str << getType< Subrange< Index > >() << "( " << range.getBegin() << ", " << range.getEnd() << " )"; } } // namespace Containers diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 42f8465978f7539bee9b2d7de731a6587a74b95f..be08266b61bc42555f9b78cd5471bce7f31f5b43 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -42,7 +42,6 @@ class Vector : public Array< Real, Device, Index, Allocator > { public: - /** * \brief Type of elements stored in this vector. */ @@ -67,16 +66,6 @@ public: */ using AllocatorType = Allocator; - /** - * \brief Defines the same vector type but allocated on host (CPU). - */ - using HostType = Vector< Real, TNL::Devices::Host, Index >; - - /** - * \brief Defines the same vector type but allocated on CUDA device (GPU). - */ - using CudaType = Vector< Real, TNL::Devices::Cuda, Index >; - /** * \brief Compatible VectorView type. */ @@ -87,6 +76,16 @@ public: */ using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >; + /** + * \brief A template which allows to quickly obtain a \ref Vector type with changed template parameters. + */ + template< typename _Real, + typename _Device = Device, + typename _Index = Index, + typename _Allocator = typename Allocators::Default< _Device >::template Allocator< _Real > > + using Self = Vector< _Real, _Device, _Index, _Allocator >; + + // constructors and assignment operators inherited from the class Array using Array< Real, Device, Index, Allocator >::Array; using Array< Real, Device, Index, Allocator >::operator=; @@ -121,16 +120,6 @@ public: */ Vector& operator=( Vector&& ) = default; - /** - * \brief Returns a \ref String representation of the vector type in C++ style. - */ - static String getType(); - - /** - * \brief Returns a \ref String representation of the vector type in C++ style. - */ - virtual String getTypeVirtual() const; - /** * \brief Returns a modifiable view of the vector. * @@ -255,75 +244,84 @@ public: Vector& operator/=( const VectorExpression& expression ); /** - * \brief Computes prefix sum of the vector elements. + * \brief Computes the scan (prefix sum) of the vector elements. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of the vector elements. + * \brief Computes the segmented scan (prefix sum) of the vector elements. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > - void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes prefix sum of the vector expression. + * \brief Computes the scan (prefix sum) of the vector expression. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * - * \param expression is the vector expression. - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > - void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); + void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of a vector expression. + * \brief Computes the segmented scan (prefix sum) of a vector expression. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param expression is the vector expression. - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > - void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 0468fc749135434ff5542dcf4ac60f239378eb41..5fdce0d09d2adb53b7c19e971fdf3b0a545891a5 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -27,31 +27,6 @@ Vector( const Vector& vector, { } -template< typename Real, - typename Device, - typename Index, - typename Allocator > -String -Vector< Real, Device, Index, Allocator >:: -getType() -{ - return String( "Containers::Vector< " ) + - TNL::getType< Real >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; -} - -template< typename Real, - typename Device, - typename Index, - typename Allocator > -String -Vector< Real, Device, Index, Allocator >:: -getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index, @@ -107,8 +82,8 @@ Vector< Real, Device, Index, Allocator >& Vector< Real, Device, Index, Allocator >:: operator=( const VectorExpression& expression ) { - Algorithms::VectorAssignment< Vector, VectorExpression >::resize( *this, expression ); - Algorithms::VectorAssignment< Vector, VectorExpression >::assign( *this, expression ); + detail::VectorAssignment< Vector, VectorExpression >::resize( *this, expression ); + detail::VectorAssignment< Vector, VectorExpression >::assign( *this, expression ); return *this; } @@ -121,7 +96,7 @@ Vector< Real, Device, Index, Allocator >& Vector< Real, Device, Index, Allocator >:: operator+=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression ); + detail::VectorAssignmentWithOperation< Vector, VectorExpression >::addition( *this, expression ); return *this; } @@ -134,7 +109,7 @@ Vector< Real, Device, Index, Allocator >& Vector< Real, Device, Index, Allocator >:: operator-=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression ); + detail::VectorAssignmentWithOperation< Vector, VectorExpression >::subtraction( *this, expression ); return *this; } @@ -147,7 +122,7 @@ Vector< Real, Device, Index, Allocator >& Vector< Real, Device, Index, Allocator >:: operator*=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression ); + detail::VectorAssignmentWithOperation< Vector, VectorExpression >::multiplication( *this, expression ); return *this; } @@ -160,7 +135,7 @@ Vector< Real, Device, Index, Allocator >& Vector< Real, Device, Index, Allocator >:: operator/=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression ); + detail::VectorAssignmentWithOperation< Vector, VectorExpression >::division( *this, expression ); return *this; } @@ -171,7 +146,7 @@ template< typename Real, template< Algorithms::ScanType Type > void Vector< Real, Device, Index, Allocator >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -186,7 +161,7 @@ template< typename Real, typename FlagsArray > void Vector< Real, Device, Index, Allocator >:: -segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -201,9 +176,9 @@ template< typename Real, typename VectorExpression > void Vector< Real, Device, Index, Allocator >:: -prefixSum( const VectorExpression& expression, IndexType begin, IndexType end ) +scan( const VectorExpression& expression, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." ); } template< typename Real, @@ -215,9 +190,9 @@ template< typename Real, typename FlagsArray > void Vector< Real, Device, Index, Allocator >:: -segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." ); } } // namespace Containers diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index ba43e74d819a2ee58d7ccd7b3bf2e7c54641c571..1a144ea5cde79951ee059f386859322eba18cf57 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -14,7 +14,7 @@ #include <TNL/Containers/ArrayView.h> #include <TNL/Containers/Expressions/ExpressionTemplates.h> -#include <TNL/Containers/Algorithms/Scan.h> +#include <TNL/Algorithms/Scan.h> namespace TNL { namespace Containers { @@ -39,7 +39,6 @@ class VectorView using BaseType = ArrayView< Real, Device, Index >; using NonConstReal = typename std::remove_const< Real >::type; public: - /** * \brief Type of elements stored in this vector. */ @@ -57,16 +56,6 @@ public: */ using IndexType = Index; - /** - * \brief Defines the same vector type but allocated on host (CPU). - */ - using HostType = VectorView< Real, TNL::Devices::Host, Index >; - - /** - * \brief Defines the same vector type but allocated on CUDA device (GPU). - */ - using CudaType = VectorView< Real, TNL::Devices::Cuda, Index >; - /** * \brief Compatible VectorView type. */ @@ -77,6 +66,15 @@ public: */ using ConstViewType = VectorView< std::add_const_t< Real >, Device, Index >; + /** + * \brief A template which allows to quickly obtain a \ref VectorView type with changed template parameters. + */ + template< typename _Real, + typename _Device = Device, + typename _Index = Index > + using Self = VectorView< _Real, _Device, _Index >; + + // constructors and assignment operators inherited from the class ArrayView using ArrayView< Real, Device, Index >::ArrayView; using ArrayView< Real, Device, Index >::operator=; @@ -97,11 +95,6 @@ public: VectorView( const ArrayView< Real_, Device, Index >& view ) : BaseType( view ) {} - /** - * \brief Returns a \ref String representation of the vector view type. - */ - static String getType(); - /** * \brief Returns a modifiable view of the vector view. * @@ -221,75 +214,84 @@ public: VectorView& operator/=( const VectorExpression& expression ); /** - * \brief Computes prefix sum of the vector view elements. + * \brief Computes the scan (prefix sum) of the vector elements. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector view remain unchanged. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive > - void prefixSum( IndexType begin = 0, IndexType end = 0 ); + void scan( IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of the vector view elements. + * \brief Computes the segmented scan (prefix sum) of the vector elements. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector view remain unchanged. Whole vector view is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename FlagsArray > - void segmentedPrefixSum( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes prefix sum of the vector expression. + * \brief Computes the scan (prefix sum) of the vector expression. * - * Computes prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * - * \param expression is the vector expression. - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression > - void prefixSum( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); + void scan( const VectorExpression& expression, IndexType begin = 0, IndexType end = 0 ); /** - * \brief Computes segmented prefix sum of a vector expression. + * \brief Computes the segmented scan (prefix sum) of a vector expression. * - * Computes segmented prefix sum for elements within the index range [ \e begin to \e end ). - * The other elements of this vector remain unchanged. Whole vector expression is assumed - * by default, i.e. when \e begin and \e end are set to zero. + * By default, segmented scan is computed for the whole vector. If \e begin + * or \e end is set to a non-zero value, only elements in the sub-interval + * `[begin, end)` are scanned. * - * \tparam Type tells the prefix sum type - either \e Inclusive of \e Exclusive. - * \tparam VectorExpression is the vector expression. - * \tparam FlagsArray is an array type describing beginnings of the segments. - * - * \param expression is the vector expression. - * \param flags is an array having `1` at the beginning of each segment and `0` on any other position - * \param begin beginning of the index range - * \param end end of the index range. + * \tparam Type The scan type - either \e Inclusive or \e Exclusive. + * + * \param expression A vector expression for which scan is computed and + * stored in this vector. + * \param flags A binary array where ones indicate the beginning of each + * segment. + * \param begin The beginning of the vector sub-interval. It is 0 by + * default. + * \param end The end of the vector sub-interval. The default value is 0 + * which is, however, replaced with the array size. */ template< Algorithms::ScanType Type = Algorithms::ScanType::Inclusive, typename VectorExpression, typename FlagsArray > - void segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); + void segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin = 0, IndexType end = 0 ); }; } // namespace Containers diff --git a/src/TNL/Containers/VectorView.hpp b/src/TNL/Containers/VectorView.hpp index 7c342703bcd526307b9cb85c8bde874a913357cc..2c1cd02c8163db83760907a50aeafaf0c8e5404d 100644 --- a/src/TNL/Containers/VectorView.hpp +++ b/src/TNL/Containers/VectorView.hpp @@ -11,25 +11,12 @@ #pragma once #include <TNL/Containers/VectorView.h> -#include <TNL/Containers/Algorithms/VectorAssignment.h> +#include <TNL/Containers/detail/VectorAssignment.h> #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { namespace Containers { -template< typename Real, - typename Device, - typename Index > -String -VectorView< Real, Device, Index >:: -getType() -{ - return String( "Containers::VectorView< " ) + - TNL::getType< Real >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename Real, typename Device, typename Index > @@ -63,7 +50,7 @@ template< typename Real, VectorView< Real, Device, Index >& VectorView< Real, Device, Index >::operator=( const VectorExpression& expression ) { - Algorithms::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression ); + detail::VectorAssignment< VectorView, VectorExpression >::assign( *this, expression ); return *this; } @@ -75,7 +62,7 @@ VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: operator+=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression ); + detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::addition( *this, expression ); return *this; } @@ -87,7 +74,7 @@ VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: operator-=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression ); + detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::subtraction( *this, expression ); return *this; } @@ -99,7 +86,7 @@ VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: operator*=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression ); + detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::multiplication( *this, expression ); return *this; } @@ -111,7 +98,7 @@ VectorView< Real, Device, Index >& VectorView< Real, Device, Index >:: operator/=( const VectorExpression& expression ) { - Algorithms::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression ); + detail::VectorAssignmentWithOperation< VectorView, VectorExpression >::division( *this, expression ); return *this; } @@ -121,7 +108,7 @@ template< typename Real, template< Algorithms::ScanType Type > void VectorView< Real, Device, Index >:: -prefixSum( IndexType begin, IndexType end ) +scan( IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -135,7 +122,7 @@ template< typename Real, typename FlagsArray > void VectorView< Real, Device, Index >:: -segmentedPrefixSum( FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( FlagsArray& flags, IndexType begin, IndexType end ) { if( end == 0 ) end = this->getSize(); @@ -149,9 +136,9 @@ template< typename Real, typename VectorExpression > void VectorView< Real, Device, Index >:: -prefixSum( const VectorExpression& expression, IndexType begin, IndexType end ) +scan( const VectorExpression& expression, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Scan (prefix sum) with vector expressions is not implemented." ); } template< typename Real, @@ -162,9 +149,9 @@ template< typename Real, typename FlagsArray > void VectorView< Real, Device, Index >:: -segmentedPrefixSum( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) +segmentedScan( const VectorExpression& expression, FlagsArray& flags, IndexType begin, IndexType end ) { - throw Exceptions::NotImplementedError( "Prefix sum with vector expressions is not implemented." ); + throw Exceptions::NotImplementedError( "Segmented scan (prefix sum) with vector expressions is not implemented." ); } } // namespace Containers diff --git a/src/TNL/Containers/Algorithms/ArrayAssignment.h b/src/TNL/Containers/detail/ArrayAssignment.h similarity index 85% rename from src/TNL/Containers/Algorithms/ArrayAssignment.h rename to src/TNL/Containers/detail/ArrayAssignment.h index 9a67a36b9190d3243332bf985ac978fcb5b7cae9..e6671bb2ced00cc2124d3019b07080ae379d986a 100644 --- a/src/TNL/Containers/Algorithms/ArrayAssignment.h +++ b/src/TNL/Containers/detail/ArrayAssignment.h @@ -11,11 +11,12 @@ #pragma once #include <TNL/TypeTraits.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> namespace TNL { namespace Containers { -namespace Algorithms { +namespace detail { template< typename Array, typename T, @@ -39,7 +40,7 @@ struct ArrayAssignment< Array, T, true > { TNL_ASSERT_EQ( a.getSize(), t.getSize(), "The sizes of the arrays must be equal." ); if( t.getSize() > 0 ) // we allow even assignment of empty arrays - ArrayOperations< typename Array::DeviceType, typename T::DeviceType >::template + Algorithms::MultiDeviceMemoryOperations< typename Array::DeviceType, typename T::DeviceType >::template copy< typename Array::ValueType, typename T::ValueType, typename Array::IndexType > ( a.getArrayData(), t.getArrayData(), t.getSize() ); } @@ -60,12 +61,12 @@ struct ArrayAssignment< Array, T, false > static void assign( Array& a, const T& t ) { TNL_ASSERT_FALSE( a.empty(), "Cannot assign value to empty array." ); - ArrayOperations< typename Array::DeviceType >::template + Algorithms::MemoryOperations< typename Array::DeviceType >::template set< typename Array::ValueType, typename Array::IndexType > ( a.getArrayData(), ( typename Array::ValueType ) t, a.getSize() ); } }; -} // namespace Algorithms +} // namespace detail } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayIO.h b/src/TNL/Containers/detail/ArrayIO.h similarity index 78% rename from src/TNL/Containers/Algorithms/ArrayIO.h rename to src/TNL/Containers/detail/ArrayIO.h index 35d79055896bf8570e13dfcde63de3a521428308..8844a554f1915379559f274d20f3dddea692c966 100644 --- a/src/TNL/Containers/Algorithms/ArrayIO.h +++ b/src/TNL/Containers/detail/ArrayIO.h @@ -14,29 +14,29 @@ #include <TNL/Object.h> #include <TNL/File.h> +#include <TNL/TypeInfo.h> namespace TNL { namespace Containers { -namespace Algorithms { +namespace detail { template< typename Value, - typename Device, typename Index, + typename Allocator, bool Elementwise = std::is_base_of< Object, Value >::value > struct ArrayIO {}; template< typename Value, - typename Device, - typename Index > -struct ArrayIO< Value, Device, Index, true > + typename Index, + typename Allocator > +struct ArrayIO< Value, Index, Allocator, true > { static String getSerializationType() { return String( "Containers::Array< " ) + - TNL::getType< Value >() + ", " + - Devices::Host::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; + TNL::getSerializationType< Value >() + ", [any_device], " + + TNL::getSerializationType< Index >() + ", [any_allocator] >"; } static void save( File& file, @@ -73,16 +73,15 @@ struct ArrayIO< Value, Device, Index, true > }; template< typename Value, - typename Device, - typename Index > -struct ArrayIO< Value, Device, Index, false > + typename Index, + typename Allocator > +struct ArrayIO< Value, Index, Allocator, false > { static String getSerializationType() { return String( "Containers::Array< " ) + - TNL::getType< Value >() + ", " + - Devices::Host::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; + TNL::getSerializationType< Value >() + ", [any_device], " + + TNL::getSerializationType< Index >() + ", [any_allocator] >"; } static void save( File& file, @@ -93,7 +92,7 @@ struct ArrayIO< Value, Device, Index, false > return; try { - file.save< Value, Value, Device >( data, elements ); + file.save< Value, Value, Allocator >( data, elements ); } catch(...) { @@ -109,7 +108,7 @@ struct ArrayIO< Value, Device, Index, false > return; try { - file.load< Value, Value, Device >( data, elements ); + file.load< Value, Value, Allocator >( data, elements ); } catch(...) { @@ -118,6 +117,6 @@ struct ArrayIO< Value, Device, Index, false > } }; -} // namespace Algorithms +} // namespace detail } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h b/src/TNL/Containers/detail/StaticArrayAssignment.h similarity index 63% rename from src/TNL/Containers/Algorithms/StaticArrayAssignment.h rename to src/TNL/Containers/detail/StaticArrayAssignment.h index 32a59e98c594e0875ce963dc0de57751c66b4bc7..9a8d7d3eeb7bdbcfd9814ad3754d4ba9131004b5 100644 --- a/src/TNL/Containers/Algorithms/StaticArrayAssignment.h +++ b/src/TNL/Containers/detail/StaticArrayAssignment.h @@ -11,33 +11,31 @@ #pragma once #include <TNL/TypeTraits.h> -#include <TNL/StaticFor.h> +#include <TNL/Algorithms/StaticFor.h> namespace TNL { namespace Containers { -namespace Algorithms { +namespace detail { - namespace detail { - struct AssignArrayFunctor - { - template< typename LeftValue, typename RightValue > - __cuda_callable__ - void operator()( int i, LeftValue& data, const RightValue& v ) const - { - data[ i ] = v[ i ]; - } - }; +struct AssignArrayFunctor +{ + template< typename LeftValue, typename RightValue > + __cuda_callable__ + void operator()( int i, LeftValue& data, const RightValue& v ) const + { + data[ i ] = v[ i ]; + } +}; - struct AssignValueFunctor - { - template< typename LeftValue, typename RightValue > - __cuda_callable__ - void operator()( int i, LeftValue& data, const RightValue& v ) const - { - data[ i ] = v; - } - }; - } // namespace detail +struct AssignValueFunctor +{ + template< typename LeftValue, typename RightValue > + __cuda_callable__ + void operator()( int i, LeftValue& data, const RightValue& v ) const + { + data[ i ] = v; + } +}; template< typename StaticArray, typename T, @@ -55,7 +53,7 @@ struct StaticArrayAssignment< StaticArray, T, true > static void assign( StaticArray& a, const T& v ) { static_assert( StaticArray::getSize() == T::getSize(), "Cannot assign static arrays with different size." ); - StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignArrayFunctor{}, a.getData(), v ); + Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignArrayFunctor{}, a.getData(), v ); } }; @@ -70,10 +68,10 @@ struct StaticArrayAssignment< StaticArray, T, false > __cuda_callable__ static void assign( StaticArray& a, const T& v ) { - StaticFor< 0, StaticArray::getSize() >::exec( detail::AssignValueFunctor{}, a, v ); + Algorithms::StaticFor< 0, StaticArray::getSize() >::exec( AssignValueFunctor{}, a, v ); } }; -} // namespace Algorithms +} // namespace detail } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/VectorAssignment.h b/src/TNL/Containers/detail/VectorAssignment.h similarity index 91% rename from src/TNL/Containers/Algorithms/VectorAssignment.h rename to src/TNL/Containers/detail/VectorAssignment.h index c861579f4e90810b024d2d84b5ea3c2eeaf92234..fa778a2480ad36c169aec903195ff2566f766359 100644 --- a/src/TNL/Containers/Algorithms/VectorAssignment.h +++ b/src/TNL/Containers/detail/VectorAssignment.h @@ -11,11 +11,11 @@ #pragma once #include <TNL/TypeTraits.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Containers { -namespace Algorithms { +namespace detail { /** * \brief Vector assignment @@ -68,7 +68,7 @@ struct VectorAssignment< Vector, T, true > { data[ i ] = t[ i ]; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment ); } }; @@ -103,7 +103,7 @@ struct VectorAssignment< Vector, T, false > { data[ i ] = t; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), assignment ); } }; @@ -169,7 +169,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false > { data[ i ] += t[ i ]; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); } __cuda_callable__ @@ -194,7 +194,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false > { data[ i ] -= t[ i ]; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); } __cuda_callable__ @@ -219,7 +219,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false > { data[ i ] *= t[ i ]; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); } __cuda_callable__ @@ -244,7 +244,7 @@ struct VectorAssignmentWithOperation< Vector, T, true, false > { data[ i ] /= t[ i ]; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); } }; @@ -275,7 +275,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false > { data[ i ] += t; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), add ); } __cuda_callable__ @@ -297,7 +297,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false > { data[ i ] -= t; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), subtract ); } __cuda_callable__ @@ -319,7 +319,7 @@ struct VectorAssignmentWithOperation< Vector, T, false, false > { data[ i ] *= t; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), multiply ); } __cuda_callable__ @@ -341,10 +341,10 @@ struct VectorAssignmentWithOperation< Vector, T, false, false > { data[ i ] /= t; }; - ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); + Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, v.getSize(), divide ); } }; -} // namespace Algorithms +} // namespace detail } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/ndarray/BoundaryExecutors.h b/src/TNL/Containers/ndarray/BoundaryExecutors.h index e4cd93705c7ae83dd36378662fa67b2e618f66eb..cf06ab1511e2179392ecee744d9589fc3ac74725 100644 --- a/src/TNL/Containers/ndarray/BoundaryExecutors.h +++ b/src/TNL/Containers/ndarray/BoundaryExecutors.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/ndarray/Meta.h> #include <TNL/Containers/ndarray/SizesHolder.h> @@ -225,12 +225,12 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 3 > > const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >(); const auto end2 = ends.template getSize< get< 2 >( Permutation{} ) >(); - ParallelFor3D< Device >::exec( begin2, begin1, begin0, skipBegin2, end1, end0, kernel, f ); - ParallelFor3D< Device >::exec( skipEnd2, begin1, begin0, end2, end1, end0, kernel, f ); - ParallelFor3D< Device >::exec( skipBegin2, begin1, begin0, skipEnd2, skipBegin1, end0, kernel, f ); - ParallelFor3D< Device >::exec( skipBegin2, skipEnd1, begin0, skipEnd2, end1, end0, kernel, f ); - ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0, skipEnd2, skipEnd1, skipBegin0, kernel, f ); - ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2, skipEnd1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, skipBegin2, end1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( skipEnd2, begin1, begin0, end2, end1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( skipBegin2, begin1, begin0, skipEnd2, skipBegin1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipEnd1, begin0, skipEnd2, end1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, begin0, skipEnd2, skipEnd1, skipBegin0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( skipBegin2, skipBegin1, skipEnd0, skipEnd2, skipEnd1, end0, kernel, f ); } template< typename __Device, typename = void > @@ -291,10 +291,10 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 2 > > const auto end0 = ends.template getSize< get< 0 >( Permutation{} ) >(); const auto end1 = ends.template getSize< get< 1 >( Permutation{} ) >(); - ParallelFor2D< Device >::exec( begin1, begin0, skipBegin1, end0, kernel, f ); - ParallelFor2D< Device >::exec( skipEnd1, begin0, end1, end0, kernel, f ); - ParallelFor2D< Device >::exec( skipBegin1, begin0, skipEnd1, skipBegin0, kernel, f ); - ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1, end0, kernel, f ); + Algorithms::ParallelFor2D< Device >::exec( begin1, begin0, skipBegin1, end0, kernel, f ); + Algorithms::ParallelFor2D< Device >::exec( skipEnd1, begin0, end1, end0, kernel, f ); + Algorithms::ParallelFor2D< Device >::exec( skipBegin1, begin0, skipEnd1, skipBegin0, kernel, f ); + Algorithms::ParallelFor2D< Device >::exec( skipBegin1, skipEnd0, skipEnd1, end0, kernel, f ); } template< typename __Device, typename = void > @@ -343,8 +343,8 @@ struct ParallelBoundaryExecutor< Permutation, Device, IndexTag< 1 > > const auto skipEnd = skipEnds.template getSize< get< 0 >( Permutation{} ) >(); const auto end = ends.template getSize< get< 0 >( Permutation{} ) >(); - ParallelFor< Device >::exec( begin, skipBegin, f ); - ParallelFor< Device >::exec( skipEnd, end, f ); + Algorithms::ParallelFor< Device >::exec( begin, skipBegin, f ); + Algorithms::ParallelFor< Device >::exec( skipEnd, end, f ); } }; diff --git a/src/TNL/Containers/ndarray/Executors.h b/src/TNL/Containers/ndarray/Executors.h index eff2adff3fa171460f09b9bf29ee13f90b7fcdb1..2d3db794d1a1ffb204723766d7224fcb0ac2884a 100644 --- a/src/TNL/Containers/ndarray/Executors.h +++ b/src/TNL/Containers/ndarray/Executors.h @@ -12,7 +12,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/ndarray/Meta.h> #include <TNL/Containers/ndarray/SizesHolder.h> @@ -139,7 +139,7 @@ struct ParallelExecutorDeviceDispatch const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >(); const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >(); const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >(); - ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel ); + Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel ); } }; @@ -168,7 +168,7 @@ struct ParallelExecutorDeviceDispatch< Permutation, Devices::Cuda > const Index end0 = ends.template getSize< get< Ends::getDimension() - 3 >( Permutation{} ) >(); const Index end1 = ends.template getSize< get< Ends::getDimension() - 2 >( Permutation{} ) >(); const Index end2 = ends.template getSize< get< Ends::getDimension() - 1 >( Permutation{} ) >(); - ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel ); + Algorithms::ParallelFor3D< Devices::Cuda >::exec( begin2, begin1, begin0, end2, end1, end0, kernel ); } }; @@ -214,7 +214,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 3 > > const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >(); const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >(); const Index end2 = ends.template getSize< get< 2 >( Permutation{} ) >(); - ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f ); + Algorithms::ParallelFor3D< Device >::exec( begin2, begin1, begin0, end2, end1, end0, kernel, f ); } template< typename __Device, typename = void > @@ -265,7 +265,7 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 2 > > const Index begin1 = begins.template getSize< get< 1 >( Permutation{} ) >(); const Index end0 = ends.template getSize< get< 0 >( Permutation{} ) >(); const Index end1 = ends.template getSize< get< 1 >( Permutation{} ) >(); - ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f ); + Algorithms::ParallelFor2D< Device >::exec( begin1, begin0, end1, end0, kernel, f ); } template< typename __Device, typename = void > @@ -312,8 +312,8 @@ struct ParallelExecutor< Permutation, Device, IndexTag< 1 > > const Index begin = begins.template getSize< get< 0 >( Permutation{} ) >(); const Index end = ends.template getSize< get< 0 >( Permutation{} ) >(); -// ParallelFor< Device >::exec( begin, end, kernel ); - ParallelFor< Device >::exec( begin, end, f ); +// Algorithms::ParallelFor< Device >::exec( begin, end, kernel ); + Algorithms::ParallelFor< Device >::exec( begin, end, f ); } }; diff --git a/src/TNL/Containers/ndarray/SizesHolder.h b/src/TNL/Containers/ndarray/SizesHolder.h index c3334e19b2c4e1ff5db6706fc25e264106fea691..1375683b28d9a04ab8d0888f6e79274d79f62fe0 100644 --- a/src/TNL/Containers/ndarray/SizesHolder.h +++ b/src/TNL/Containers/ndarray/SizesHolder.h @@ -13,8 +13,8 @@ #pragma once #include <TNL/Assert.h> -#include <TNL/Devices/CudaCallable.h> -#include <TNL/TemplateStaticFor.h> +#include <TNL/Cuda/CudaCallable.h> +#include <TNL/Algorithms/TemplateStaticFor.h> #include <TNL/Containers/ndarray/Meta.h> @@ -231,7 +231,7 @@ SizesHolder< Index, sizes... > operator+( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs ) { SizesHolder< Index, sizes... > result; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorPlusHelper >::execHost( result, lhs, rhs ); return result; } @@ -242,7 +242,7 @@ SizesHolder< Index, sizes... > operator-( const SizesHolder< Index, sizes... >& lhs, const OtherHolder& rhs ) { SizesHolder< Index, sizes... > result; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes), __ndarray_impl::SizesHolerOperatorMinusHelper >::execHost( result, lhs, rhs ); return result; } @@ -295,9 +295,9 @@ template< typename Index, std::ostream& operator<<( std::ostream& str, const SizesHolder< Index, sizes... >& holder ) { str << "SizesHolder< "; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, holder ); str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >( "; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder ); str << holder.template getSize< sizeof...(sizes) - 1 >() << " )"; return str; } @@ -360,10 +360,10 @@ template< typename Index, std::ostream& operator<<( std::ostream& str, const __ndarray_impl::LocalBeginsHolder< SizesHolder< Index, sizes... >, ConstValue >& holder ) { str << "LocalBeginsHolder< SizesHolder< "; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderStaticSizePrinter >::execHost( str, (SizesHolder< Index, sizes... >) holder ); str << holder.template getStaticSize< sizeof...(sizes) - 1 >() << " >, "; str << ConstValue << " >( "; - TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder ); + Algorithms::TemplateStaticFor< std::size_t, 0, sizeof...(sizes) - 1, __ndarray_impl::SizesHolderSizePrinter >::execHost( str, holder ); str << holder.template getSize< sizeof...(sizes) - 1 >() << " )"; return str; } diff --git a/src/TNL/Containers/ndarray/SizesHolderHelpers.h b/src/TNL/Containers/ndarray/SizesHolderHelpers.h index 9d1c0d439fe69129058d998679492a4338fc7ba3..d06c9a7a0980780c0a875bb5ef8e59a641c3810e 100644 --- a/src/TNL/Containers/ndarray/SizesHolderHelpers.h +++ b/src/TNL/Containers/ndarray/SizesHolderHelpers.h @@ -15,7 +15,7 @@ #include <algorithm> #include <TNL/Assert.h> -#include <TNL/TemplateStaticFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> #include <TNL/Containers/ndarray/Meta.h> namespace TNL { @@ -227,7 +227,7 @@ bool sizesWeakCompare( const SizesHolder1& sizes1, const SizesHolder2& sizes2 ) static_assert( SizesHolder1::getDimension() == SizesHolder2::getDimension(), "Cannot compare sizes of different dimensions." ); bool result = true; - TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result ); + Algorithms::TemplateStaticFor< std::size_t, 0, SizesHolder1::getDimension(), WeakCompareHelper >::exec( sizes1, sizes2, result ); return result; } diff --git a/src/TNL/Cuda/CheckDevice.h b/src/TNL/Cuda/CheckDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..c857d8dd6ab8129fd2b1cac4e967831207296153 --- /dev/null +++ b/src/TNL/Cuda/CheckDevice.h @@ -0,0 +1,40 @@ +/*************************************************************************** + CheckDevice.h - description + ------------------- + begin : Aug 18, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Exceptions/CudaRuntimeError.h> + +namespace TNL { +namespace Cuda { + +#ifdef HAVE_CUDA + /**** + * I do not know why, but it is more reliable to pass the error code instead + * of calling cudaGetLastError() inside the function. + * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. + */ + inline void checkDevice( const char* file_name, int line, cudaError error ) + { + if( error != cudaSuccess ) + throw Exceptions::CudaRuntimeError( error, file_name, line ); + } +#else + inline void checkDevice() {} +#endif + +} // namespace Cuda +} // namespace TNL + +#ifdef HAVE_CUDA +#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() ) +#else +#define TNL_CHECK_CUDA_DEVICE ::TNL::Cuda::checkDevice() +#endif diff --git a/src/TNL/Devices/CudaCallable.h b/src/TNL/Cuda/CudaCallable.h similarity index 76% rename from src/TNL/Devices/CudaCallable.h rename to src/TNL/Cuda/CudaCallable.h index f9311443f12a0c85fb6fba9ebaf07ca47736b030..5cd3e8fbbe51abe0bd7dc525c165990b734ef388 100644 --- a/src/TNL/Devices/CudaCallable.h +++ b/src/TNL/Cuda/CudaCallable.h @@ -12,19 +12,14 @@ // The __cuda_callable__ macro has to be in a separate header file to avoid // infinite loops by the #include directives. -// -// For example, the implementation of Devices::Cuda needs TNL_ASSERT_* -// macros, which need __cuda_callable__ functions. /*** * This macro serves for definition of function which are supposed to be called * even from device. If HAVE_CUDA is defined, the __cuda_callable__ function * is compiled for both CPU and GPU. If HAVE_CUDA is not defined, this macro has - * no effect. Support for Intel Xeon Phi is now in "hibernated" state. + * no effect. */ -#ifdef HAVE_MIC - #define __cuda_callable__ __attribute__((target(mic))) -#elif HAVE_CUDA +#ifdef HAVE_CUDA #define __cuda_callable__ __device__ __host__ #else #define __cuda_callable__ diff --git a/src/TNL/Cuda/DeviceInfo.h b/src/TNL/Cuda/DeviceInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..d53b46fecbf45c49d7c9d6723423c6951a456fef --- /dev/null +++ b/src/TNL/Cuda/DeviceInfo.h @@ -0,0 +1,52 @@ +/*************************************************************************** + CudaDeviceInfo.h - description + ------------------- + begin : Jun 21, 2015 + copyright : (C) 2007 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/String.h> + +namespace TNL { +namespace Cuda { + +struct DeviceInfo +{ + static int getNumberOfDevices(); + + static int getActiveDevice(); + + static String getDeviceName( int deviceNum ); + + static int getArchitectureMajor( int deviceNum ); + + static int getArchitectureMinor( int deviceNum ); + + static int getClockRate( int deviceNum ); + + static std::size_t getGlobalMemory( int deviceNum ); + + static std::size_t getFreeGlobalMemory(); + + static int getMemoryClockRate( int deviceNum ); + + static bool getECCEnabled( int deviceNum ); + + static int getCudaMultiprocessors( int deviceNum ); + + static int getCudaCoresPerMultiprocessors( int deviceNum ); + + static int getCudaCores( int deviceNum ); + + static int getRegistersPerMultiprocessor( int deviceNum ); +}; + +} // namespace Cuda +} // namespace TNL + +#include <TNL/Cuda/DeviceInfo.hpp> diff --git a/src/TNL/Devices/CudaDeviceInfo_impl.h b/src/TNL/Cuda/DeviceInfo.hpp similarity index 86% rename from src/TNL/Devices/CudaDeviceInfo_impl.h rename to src/TNL/Cuda/DeviceInfo.hpp index f29ecd8c91493edb538f11a8ced6c9ee6503983a..d10e6f05cbbb391d06e1287ad6f19db30c967ac0 100644 --- a/src/TNL/Devices/CudaDeviceInfo_impl.h +++ b/src/TNL/Cuda/DeviceInfo.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - CudaDeviceInfo_impl.h - description + DeviceInfo.hpp - description ------------------- begin : Jun 21, 2015 copyright : (C) 2007 by Tomas Oberhuber @@ -12,14 +12,14 @@ #include <unordered_map> -#include <TNL/Devices/CudaDeviceInfo.h> +#include <TNL/Cuda/DeviceInfo.h> #include <TNL/Exceptions/CudaSupportMissing.h> namespace TNL { -namespace Devices { +namespace Cuda { inline int -CudaDeviceInfo:: +DeviceInfo:: getNumberOfDevices() { #ifdef HAVE_CUDA @@ -32,7 +32,7 @@ getNumberOfDevices() } inline int -CudaDeviceInfo:: +DeviceInfo:: getActiveDevice() { #ifdef HAVE_CUDA @@ -45,7 +45,7 @@ getActiveDevice() } inline String -CudaDeviceInfo:: +DeviceInfo:: getDeviceName( int deviceNum ) { #ifdef HAVE_CUDA @@ -58,7 +58,7 @@ getDeviceName( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getArchitectureMajor( int deviceNum ) { #ifdef HAVE_CUDA @@ -71,7 +71,7 @@ getArchitectureMajor( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getArchitectureMinor( int deviceNum ) { #ifdef HAVE_CUDA @@ -84,7 +84,7 @@ getArchitectureMinor( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getClockRate( int deviceNum ) { #ifdef HAVE_CUDA @@ -96,8 +96,8 @@ getClockRate( int deviceNum ) #endif } -inline size_t -CudaDeviceInfo:: +inline std::size_t +DeviceInfo:: getGlobalMemory( int deviceNum ) { #ifdef HAVE_CUDA @@ -109,13 +109,13 @@ getGlobalMemory( int deviceNum ) #endif } -inline size_t -CudaDeviceInfo:: +inline std::size_t +DeviceInfo:: getFreeGlobalMemory() { #ifdef HAVE_CUDA - size_t free = 0; - size_t total = 0; + std::size_t free = 0; + std::size_t total = 0; cudaMemGetInfo( &free, &total ); return free; #else @@ -124,7 +124,7 @@ getFreeGlobalMemory() } inline int -CudaDeviceInfo:: +DeviceInfo:: getMemoryClockRate( int deviceNum ) { #ifdef HAVE_CUDA @@ -137,7 +137,7 @@ getMemoryClockRate( int deviceNum ) } inline bool -CudaDeviceInfo:: +DeviceInfo:: getECCEnabled( int deviceNum ) { #ifdef HAVE_CUDA @@ -150,7 +150,7 @@ getECCEnabled( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getCudaMultiprocessors( int deviceNum ) { #ifdef HAVE_CUDA @@ -169,12 +169,12 @@ getCudaMultiprocessors( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getCudaCoresPerMultiprocessors( int deviceNum ) { #ifdef HAVE_CUDA - int major = CudaDeviceInfo::getArchitectureMajor( deviceNum ); - int minor = CudaDeviceInfo::getArchitectureMinor( deviceNum ); + int major = DeviceInfo::getArchitectureMajor( deviceNum ); + int minor = DeviceInfo::getArchitectureMinor( deviceNum ); switch( major ) { case 1: // Tesla generation, G80, G8x, G9x classes @@ -209,19 +209,19 @@ getCudaCoresPerMultiprocessors( int deviceNum ) } inline int -CudaDeviceInfo:: +DeviceInfo:: getCudaCores( int deviceNum ) { #ifdef HAVE_CUDA - return CudaDeviceInfo::getCudaMultiprocessors( deviceNum ) * - CudaDeviceInfo::getCudaCoresPerMultiprocessors( deviceNum ); + return DeviceInfo::getCudaMultiprocessors( deviceNum ) * + DeviceInfo::getCudaCoresPerMultiprocessors( deviceNum ); #else throw Exceptions::CudaSupportMissing(); #endif } inline int -CudaDeviceInfo:: +DeviceInfo:: getRegistersPerMultiprocessor( int deviceNum ) { #ifdef HAVE_CUDA @@ -239,5 +239,5 @@ getRegistersPerMultiprocessor( int deviceNum ) #endif } -} // namespace Devices +} // namespace Cuda } // namespace TNL diff --git a/src/TNL/Cuda/LaunchHelpers.h b/src/TNL/Cuda/LaunchHelpers.h new file mode 100644 index 0000000000000000000000000000000000000000..6e5d3c9757601afaa5f9d9c2be45593298f7ab12 --- /dev/null +++ b/src/TNL/Cuda/LaunchHelpers.h @@ -0,0 +1,170 @@ +/*************************************************************************** + LaunchHelpers.h - description + ------------------- + begin : Aug 19, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Math.h> + +namespace TNL { +namespace Cuda { + +inline constexpr int getMaxGridSize() +{ + return 65535; +} + +inline constexpr int getMaxBlockSize() +{ + return 1024; +} + +inline constexpr int getWarpSize() +{ + return 32; +} + +// When we transfer data between the GPU and the CPU we use 1 MiB buffer. This +// size should ensure good performance. +// We use the same buffer size even for retyping data during IO operations. +inline constexpr int getTransferBufferSize() +{ + return 1 << 20; +} + +#ifdef HAVE_CUDA +__device__ inline int getGlobalThreadIdx( const int gridIdx = 0, + const int gridSize = getMaxGridSize() ) +{ + return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x; +} + +__device__ inline int getGlobalThreadIdx_x( const dim3& gridIdx ) +{ + return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; +} + +__device__ inline int getGlobalThreadIdx_y( const dim3& gridIdx ) +{ + return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; +} + +__device__ inline int getGlobalThreadIdx_z( const dim3& gridIdx ) +{ + return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z; +} +#endif + +inline int getNumberOfBlocks( const int threads, + const int blockSize ) +{ + return roundUpDivision( threads, blockSize ); +} + +inline int getNumberOfGrids( const int blocks, + const int gridSize = getMaxGridSize() ) +{ + return roundUpDivision( blocks, gridSize ); +} + +#ifdef HAVE_CUDA +inline void setupThreads( const dim3& blockSize, + dim3& blocksCount, + dim3& gridsCount, + long long int xThreads, + long long int yThreads = 0, + long long int zThreads = 0 ) +{ + blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) ); + blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) ); + blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) ); + + /**** + * TODO: Fix the following: + * I do not known how to get max grid size in kernels :( + * + * Also, this is very slow. */ + /*int currentDevice( 0 ); + cudaGetDevice( currentDevice ); + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, currentDevice ); + gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 ); + gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 ); + gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 ); + */ + gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 ); + gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 ); + gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 ); +} + +inline void setupGrid( const dim3& blocksCount, + const dim3& gridsCount, + const dim3& gridIdx, + dim3& gridSize ) +{ + /* TODO: this is ext slow!!!! + int currentDevice( 0 ); + cudaGetDevice( ¤tDevice ); + cudaDeviceProp properties; + cudaGetDeviceProperties( &properties, currentDevice );*/ + + /**** + * TODO: fix the following + if( gridIdx.x < gridsCount.x ) + gridSize.x = properties.maxGridSize[ 0 ]; + else + gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ]; + + if( gridIdx.y < gridsCount.y ) + gridSize.y = properties.maxGridSize[ 1 ]; + else + gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ]; + + if( gridIdx.z < gridsCount.z ) + gridSize.z = properties.maxGridSize[ 2 ]; + else + gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/ + + if( gridIdx.x < gridsCount.x - 1 ) + gridSize.x = getMaxGridSize(); + else + gridSize.x = blocksCount.x % getMaxGridSize(); + + if( gridIdx.y < gridsCount.y - 1 ) + gridSize.y = getMaxGridSize(); + else + gridSize.y = blocksCount.y % getMaxGridSize(); + + if( gridIdx.z < gridsCount.z - 1 ) + gridSize.z = getMaxGridSize(); + else + gridSize.z = blocksCount.z % getMaxGridSize(); +} + +inline std::ostream& operator<<( std::ostream& str, const dim3& d ) +{ + str << "( " << d.x << ", " << d.y << ", " << d.z << " )"; + return str; +} + +inline void printThreadsSetup( const dim3& blockSize, + const dim3& blocksCount, + const dim3& gridSize, + const dim3& gridsCount, + std::ostream& str = std::cout ) +{ + str << "Block size: " << blockSize << std::endl + << " Blocks count: " << blocksCount << std::endl + << " Grid size: " << gridSize << std::endl + << " Grids count: " << gridsCount << std::endl; +} +#endif + +} // namespace Cuda +} // namespace TNL diff --git a/src/TNL/Cuda/MemoryHelpers.h b/src/TNL/Cuda/MemoryHelpers.h new file mode 100644 index 0000000000000000000000000000000000000000..cb214f5d02ebaf6784d08e4c288de6ddd8638de7 --- /dev/null +++ b/src/TNL/Cuda/MemoryHelpers.h @@ -0,0 +1,103 @@ +/*************************************************************************** + MemoryHelpers.h - description + ------------------- + begin : Aug 19, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <iostream> + +#include <TNL/Cuda/CheckDevice.h> +#include <TNL/Exceptions/CudaSupportMissing.h> +#include <TNL/Exceptions/CudaBadAlloc.h> + +namespace TNL { +namespace Cuda { + +template< typename ObjectType > +[[deprecated("Allocators and MemoryOperations hould be used instead.")]] +ObjectType* passToDevice( const ObjectType& object ) +{ +#ifdef HAVE_CUDA + ObjectType* deviceObject; + if( cudaMalloc( ( void** ) &deviceObject, + ( size_t ) sizeof( ObjectType ) ) != cudaSuccess ) + throw Exceptions::CudaBadAlloc(); + if( cudaMemcpy( ( void* ) deviceObject, + ( void* ) &object, + sizeof( ObjectType ), + cudaMemcpyHostToDevice ) != cudaSuccess ) + { + TNL_CHECK_CUDA_DEVICE; + cudaFree( ( void* ) deviceObject ); + TNL_CHECK_CUDA_DEVICE; + return 0; + } + return deviceObject; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +template< typename ObjectType > +[[deprecated("Allocators and MemoryOperations hould be used instead.")]] +ObjectType passFromDevice( const ObjectType* object ) +{ +#ifdef HAVE_CUDA + ObjectType aux; + cudaMemcpy( ( void* ) aux, + ( void* ) &object, + sizeof( ObjectType ), + cudaMemcpyDeviceToHost ); + TNL_CHECK_CUDA_DEVICE; + return aux; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +template< typename ObjectType > +[[deprecated("Allocators and MemoryOperations hould be used instead.")]] +void passFromDevice( const ObjectType* deviceObject, + ObjectType& hostObject ) +{ +#ifdef HAVE_CUDA + cudaMemcpy( ( void* ) &hostObject, + ( void* ) deviceObject, + sizeof( ObjectType ), + cudaMemcpyDeviceToHost ); + TNL_CHECK_CUDA_DEVICE; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +template< typename ObjectType > +[[deprecated("Allocators and MemoryOperations hould be used instead.")]] +void freeFromDevice( ObjectType* deviceObject ) +{ +#ifdef HAVE_CUDA + cudaFree( ( void* ) deviceObject ); + TNL_CHECK_CUDA_DEVICE; +#else + throw Exceptions::CudaSupportMissing(); +#endif +} + +template< typename ObjectType > +void print( const ObjectType* deviceObject, std::ostream& str = std::cout ) +{ +#ifdef HAVE_CUDA + ObjectType hostObject; + passFromDevice( deviceObject, hostObject ); + str << hostObject; +#endif +} + +} // namespace Cuda +} // namespace TNL diff --git a/src/TNL/CudaSharedMemory.h b/src/TNL/Cuda/SharedMemory.h similarity index 78% rename from src/TNL/CudaSharedMemory.h rename to src/TNL/Cuda/SharedMemory.h index ec9a43c207fc7962f36ffb40ad0af71973b76868..29851952c01c86356e6872511f90496358b55152 100644 --- a/src/TNL/CudaSharedMemory.h +++ b/src/TNL/Cuda/SharedMemory.h @@ -1,5 +1,5 @@ /*************************************************************************** - CudaSharedMemory.h - description + SharedMemory.h - description ------------------- begin : Oct 18, 2017 copyright : (C) 2017 by Tomas Oberhuber et al. @@ -26,11 +26,11 @@ * * Until CUDA 8.0, it was possible to use reinterpret_cast this way: * - * template< typename Element, size_t Alignment > - * __device__ Element* Cuda::getSharedMemory() + * template< typename T, size_t Alignment > + * __device__ T* getSharedMemory() * { * extern __shared__ __align__ ( Alignment ) unsigned char __sdata[]; - * return reinterpret_cast< Element* >( __sdata ); + * return reinterpret_cast< T* >( __sdata ); * } * * But since CUDA 9.0 there is a new restriction that the alignment of the @@ -44,12 +44,13 @@ #include <stdint.h> namespace TNL { +namespace Cuda { template< typename T, std::size_t _alignment = CHAR_BIT * sizeof(T) > -struct CudaSharedMemory {}; +struct SharedMemory; template< typename T > -struct CudaSharedMemory< T, 8 > +struct SharedMemory< T, 8 > { __device__ inline operator T* () { @@ -65,7 +66,7 @@ struct CudaSharedMemory< T, 8 > }; template< typename T > -struct CudaSharedMemory< T, 16 > +struct SharedMemory< T, 16 > { __device__ inline operator T* () { @@ -81,7 +82,7 @@ struct CudaSharedMemory< T, 16 > }; template< typename T > -struct CudaSharedMemory< T, 32 > +struct SharedMemory< T, 32 > { __device__ inline operator T* () { @@ -97,7 +98,7 @@ struct CudaSharedMemory< T, 32 > }; template< typename T > -struct CudaSharedMemory< T, 64 > +struct SharedMemory< T, 64 > { __device__ inline operator T* () { @@ -112,6 +113,25 @@ struct CudaSharedMemory< T, 64 > } }; +template< typename T > +__device__ inline T* getSharedMemory() +{ + return SharedMemory< T >{}; +} + +// helper functions for indexing shared memory +inline constexpr int getNumberOfSharedMemoryBanks() +{ + return 32; +} + +template< typename Index > +__device__ Index getInterleaving( const Index index ) +{ + return index + index / Cuda::getNumberOfSharedMemoryBanks(); +} + +} // namespace Cuda } // namespace TNL #endif diff --git a/src/TNL/CudaStreamPool.h b/src/TNL/Cuda/StreamPool.h similarity index 73% rename from src/TNL/CudaStreamPool.h rename to src/TNL/Cuda/StreamPool.h index 1dd2b7907fe39b53e331b0147fff1cabe16424ef..59bf38a5791d2ae9f381e831559c5caa7788567a 100644 --- a/src/TNL/CudaStreamPool.h +++ b/src/TNL/Cuda/StreamPool.h @@ -1,5 +1,5 @@ /*************************************************************************** - CudaStreamPool.h - description + StreamPool.h - description ------------------- begin : Oct 14, 2016 copyright : (C) 2016 by Tomas Oberhuber et al. @@ -15,22 +15,20 @@ #include <stdlib.h> #include <unordered_map> -#include <TNL/Devices/Host.h> -#include <TNL/Devices/Cuda.h> - namespace TNL { +namespace Cuda { #ifdef HAVE_CUDA -class CudaStreamPool +class StreamPool { public: // stop the compiler generating methods of copy the object - CudaStreamPool( CudaStreamPool const& copy ) = delete; - CudaStreamPool& operator=( CudaStreamPool const& copy ) = delete; + StreamPool( StreamPool const& copy ) = delete; + StreamPool& operator=( StreamPool const& copy ) = delete; - inline static CudaStreamPool& getInstance() + inline static StreamPool& getInstance() { - static CudaStreamPool instance; + static StreamPool instance; return instance; } @@ -47,14 +45,14 @@ class CudaStreamPool private: // private constructor of the singleton - inline CudaStreamPool() + inline StreamPool() { - atexit( CudaStreamPool::free_atexit ); + atexit( StreamPool::free_atexit ); } inline static void free_atexit( void ) { - CudaStreamPool::getInstance().free(); + StreamPool::getInstance().free(); } protected: @@ -70,5 +68,6 @@ class CudaStreamPool }; #endif +} // namespace Cuda } // namespace TNL diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index 7831014155e9a730c1be101c47cb2602cd8d3179..2b3bf8c6660b1f4e4c2d7310c24f25ea047beea6 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -10,13 +10,7 @@ #pragma once -#include <iostream> - #include <TNL/String.h> -#include <TNL/Assert.h> -#include <TNL/Pointers/SmartPointersRegister.h> -#include <TNL/Timer.h> -#include <TNL/Devices/CudaCallable.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Config/ParameterContainer.h> @@ -25,188 +19,30 @@ namespace Devices { class Cuda { - public: - - static inline String getDeviceType(); - - // TODO: Remove getDeviceType(); - static inline String getType() { return getDeviceType();}; - - static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); - - static inline bool setup( const Config::ParameterContainer& parameters, - const String& prefix = "" ); - - __cuda_callable__ static inline constexpr int getMaxGridSize(); - - __cuda_callable__ static inline constexpr int getMaxBlockSize(); - - __cuda_callable__ static inline constexpr int getWarpSize(); - - __cuda_callable__ static inline constexpr int getNumberOfSharedMemoryBanks(); - - static inline constexpr int getGPUTransferBufferSize(); - -#ifdef HAVE_CUDA - /*** - * This function is obsolete and should be replaced by the following functions. - */ - __device__ static inline int - getGlobalThreadIdx( const int gridIdx = 0, - const int gridSize = getMaxGridSize() ); - - __device__ static inline int - getGlobalThreadIdx_x( const dim3& gridIdx ); - - __device__ static inline int - getGlobalThreadIdx_y( const dim3& gridIdx ); - - __device__ static inline int - getGlobalThreadIdx_z( const dim3& gridIdx ); -#endif - - /**** - * This functions helps to count number of CUDA blocks depending on the - * number of the CUDA threads and the block size. - * It is obsolete and it will be replaced by setupThreads. - */ - static inline int getNumberOfBlocks( const int threads, - const int blockSize ); - - /**** - * This functions helps to count number of CUDA grids depending on the - * number of the CUDA blocks and maximum grid size. - * It is obsolete and it will be replaced by setupThreads. - */ - static inline int getNumberOfGrids( const int blocks, - const int gridSize = getMaxGridSize() ); - -#ifdef HAVE_CUDA - /*! This method sets up gridSize and computes number of grids depending - * on total number of CUDA threads. - */ - static void setupThreads( const dim3& blockSize, - dim3& blocksCount, - dim3& gridsCount, - long long int xThreads, - long long int yThreads = 0, - long long int zThreads = 0 ); - - /*! This method sets up grid size when one iterates over more grids. - * If gridIdx.? < gridsCount.? then the gridSize.? is set to maximum - * allowed by CUDA. Otherwise gridSize.? is set to the size of the grid - * in the last loop i.e. blocksCount.? % maxGridSize.?. - */ - static void setupGrid( const dim3& blocksCount, - const dim3& gridsCount, - const dim3& gridIdx, - dim3& gridSize ); - - static void printThreadsSetup( const dim3& blockSize, - const dim3& blocksCount, - const dim3& gridSize, - const dim3& gridsCount, - std::ostream& str = std::cout ); -#endif - - template< typename ObjectType > - static ObjectType* passToDevice( const ObjectType& object ); - - template< typename ObjectType > - static ObjectType passFromDevice( const ObjectType* object ); - - template< typename ObjectType > - static void passFromDevice( const ObjectType* deviceObject, - ObjectType& hostObject ); - - template< typename ObjectType > - static void freeFromDevice( ObjectType* object ); - - template< typename ObjectType > - static void print( const ObjectType* object, std::ostream& str = std::cout ); - +public: + static inline void configSetup( Config::ConfigDescription& config, const String& prefix = "" ) + { #ifdef HAVE_CUDA - template< typename Index > - static __device__ Index getInterleaving( const Index index ); - - /**** - * Declaration of variables for dynamic shared memory is difficult in - * templated functions. For example, the following does not work for - * different types T: - * - * template< typename T > - * void foo() - * { - * extern __shared__ T shx[]; - * } - * - * This is because extern variables must be declared exactly once. In - * templated functions we need to have same variable name with different - * type, which causes the conflict. In CUDA samples they solve the problem - * using template specialization via classes, but using one base type and - * reinterpret_cast works too. - * See http://stackoverflow.com/a/19339004/4180822 for reference. - */ - template< typename Element > - static __device__ Element* getSharedMemory(); -#endif - -#ifdef HAVE_CUDA - /**** - * I do not know why, but it is more reliable to pass the error code instead - * of calling cudaGetLastError() inside the method. - * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. - */ - static inline void checkDevice( const char* file_name, int line, cudaError error ); -#else - static inline void checkDevice() {} -#endif - - static inline void insertSmartPointer( Pointers::SmartPointer* pointer ); - - static inline void removeSmartPointer( Pointers::SmartPointer* pointer ); - - // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be - // called to get the device ID. - static inline bool synchronizeDevice( int deviceId = -1 ); - - static inline Timer& getSmartPointersSynchronizationTimer(); - - //// - // When we transfer data between the GPU and the CPU we use 5 MB buffer. This - // size should ensure good performance -- see. - // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer . - // We use the same buffer size even for retyping data during IO operations. - // - static constexpr std::size_t TransferBufferSize = 5 * 2<<20; - - - protected: - - static inline Pointers::SmartPointersRegister& getSmartPointersRegister(); -}; - -#ifdef HAVE_CUDA -#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice( __FILE__, __LINE__, cudaGetLastError() ) + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 ); #else -#define TNL_CHECK_CUDA_DEVICE ::TNL::Devices::Cuda::checkDevice() -#endif - -#ifdef HAVE_CUDA -namespace { - std::ostream& operator << ( std::ostream& str, const dim3& d ); -} + config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 ); #endif + } + static inline bool setup( const Config::ParameterContainer& parameters, + const String& prefix = "" ) + { #ifdef HAVE_CUDA -#if __CUDA_ARCH__ < 600 -namespace { - __device__ double atomicAdd(double* address, double val); -} -#endif + int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); + if( cudaSetDevice( cudaDevice ) != cudaSuccess ) + { + std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl; + return false; + } #endif + return true; + } +}; } // namespace Devices } // namespace TNL - -#include <TNL/Devices/Cuda_impl.h> diff --git a/src/TNL/Devices/CudaDeviceInfo.h b/src/TNL/Devices/CudaDeviceInfo.h deleted file mode 100644 index 9eefe3bad8932670af271204e03f72b5eb501a95..0000000000000000000000000000000000000000 --- a/src/TNL/Devices/CudaDeviceInfo.h +++ /dev/null @@ -1,56 +0,0 @@ -/*************************************************************************** - CudaDeviceInfo.h - description - ------------------- - begin : Jun 21, 2015 - copyright : (C) 2007 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <stdlib.h> - -#include <TNL/String.h> - -namespace TNL { -namespace Devices { - -class CudaDeviceInfo -{ - public: - - static int getNumberOfDevices(); - - static int getActiveDevice(); - - static String getDeviceName( int deviceNum ); - - static int getArchitectureMajor( int deviceNum ); - - static int getArchitectureMinor( int deviceNum ); - - static int getClockRate( int deviceNum ); - - static size_t getGlobalMemory( int deviceNum ); - - static size_t getFreeGlobalMemory(); - - static int getMemoryClockRate( int deviceNum ); - - static bool getECCEnabled( int deviceNum ); - - static int getCudaMultiprocessors( int deviceNum ); - - static int getCudaCoresPerMultiprocessors( int deviceNum ); - - static int getCudaCores( int deviceNum ); - - static int getRegistersPerMultiprocessor( int deviceNum ); -}; - -} // namespace Devices -} // namespace TNL - -#include <TNL/Devices/CudaDeviceInfo_impl.h> diff --git a/src/TNL/Devices/Cuda_impl.h b/src/TNL/Devices/Cuda_impl.h deleted file mode 100644 index 07e2c1ddcba83bd07405e5cbcdc726e8d56ba6f7..0000000000000000000000000000000000000000 --- a/src/TNL/Devices/Cuda_impl.h +++ /dev/null @@ -1,384 +0,0 @@ -/*************************************************************************** - Cuda_impl.h - description - ------------------- - begin : Jan 21, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Math.h> -#include <TNL/Devices/Cuda.h> -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Exceptions/CudaBadAlloc.h> -#include <TNL/Exceptions/CudaSupportMissing.h> -#include <TNL/Exceptions/CudaRuntimeError.h> -#include <TNL/CudaSharedMemory.h> - -namespace TNL { -namespace Devices { - -inline String Cuda::getDeviceType() -{ - return String( "Devices::Cuda" ); -} - -inline void -Cuda::configSetup( Config::ConfigDescription& config, - const String& prefix ) -{ -#ifdef HAVE_CUDA - config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation.", 0 ); -#else - config.addEntry< int >( prefix + "cuda-device", "Choose CUDA device to run the computation (not supported on this system).", 0 ); -#endif -} - -inline bool -Cuda::setup( const Config::ParameterContainer& parameters, - const String& prefix ) -{ -#ifdef HAVE_CUDA - int cudaDevice = parameters.getParameter< int >( prefix + "cuda-device" ); - if( cudaSetDevice( cudaDevice ) != cudaSuccess ) - { - std::cerr << "I cannot activate CUDA device number " << cudaDevice << "." << std::endl; - return false; - } - getSmartPointersSynchronizationTimer().reset(); - getSmartPointersSynchronizationTimer().stop(); -#endif - return true; -} - -__cuda_callable__ -inline constexpr int Cuda::getMaxGridSize() -{ - return 65535; -} - -__cuda_callable__ -inline constexpr int Cuda::getMaxBlockSize() -{ - return 1024; -} - -__cuda_callable__ -inline constexpr int Cuda::getWarpSize() -{ - return 32; -} - -__cuda_callable__ -inline constexpr int Cuda::getNumberOfSharedMemoryBanks() -{ - return 32; -} - -inline constexpr int Cuda::getGPUTransferBufferSize() -{ - return 1 << 20; -} - -#ifdef HAVE_CUDA -__device__ inline int Cuda::getGlobalThreadIdx( const int gridIdx, const int gridSize ) -{ - return ( gridIdx * gridSize + blockIdx.x ) * blockDim.x + threadIdx.x; -} - -__device__ inline int Cuda::getGlobalThreadIdx_x( const dim3& gridIdx ) -{ - return ( gridIdx.x * getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; -} - -__device__ inline int Cuda::getGlobalThreadIdx_y( const dim3& gridIdx ) -{ - return ( gridIdx.y * getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; -} - -__device__ inline int Cuda::getGlobalThreadIdx_z( const dim3& gridIdx ) -{ - return ( gridIdx.z * getMaxGridSize() + blockIdx.z ) * blockDim.z + threadIdx.z; -} -#endif - -inline int Cuda::getNumberOfBlocks( const int threads, - const int blockSize ) -{ - return roundUpDivision( threads, blockSize ); -} - -inline int Cuda::getNumberOfGrids( const int blocks, - const int gridSize ) -{ - return roundUpDivision( blocks, gridSize ); -} - -#ifdef HAVE_CUDA -inline void Cuda::setupThreads( const dim3& blockSize, - dim3& blocksCount, - dim3& gridsCount, - long long int xThreads, - long long int yThreads, - long long int zThreads ) -{ - blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) ); - blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) ); - blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) ); - - /**** - * TODO: Fix the following: - * I do not known how to get max grid size in kernels :( - * - * Also, this is very slow. */ - /*int currentDevice( 0 ); - cudaGetDevice( currentDevice ); - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, currentDevice ); - gridsCount.x = blocksCount.x / properties.maxGridSize[ 0 ] + ( blocksCount.x % properties.maxGridSize[ 0 ] != 0 ); - gridsCount.y = blocksCount.y / properties.maxGridSize[ 1 ] + ( blocksCount.y % properties.maxGridSize[ 1 ] != 0 ); - gridsCount.z = blocksCount.z / properties.maxGridSize[ 2 ] + ( blocksCount.z % properties.maxGridSize[ 2 ] != 0 ); - */ - gridsCount.x = blocksCount.x / getMaxGridSize() + ( blocksCount.x % getMaxGridSize() != 0 ); - gridsCount.y = blocksCount.y / getMaxGridSize() + ( blocksCount.y % getMaxGridSize() != 0 ); - gridsCount.z = blocksCount.z / getMaxGridSize() + ( blocksCount.z % getMaxGridSize() != 0 ); -} - -inline void Cuda::setupGrid( const dim3& blocksCount, - const dim3& gridsCount, - const dim3& gridIdx, - dim3& gridSize ) -{ - /* TODO: this is extremely slow!!!! - int currentDevice( 0 ); - cudaGetDevice( ¤tDevice ); - cudaDeviceProp properties; - cudaGetDeviceProperties( &properties, currentDevice );*/ - - /**** - * TODO: fix the following - if( gridIdx.x < gridsCount.x ) - gridSize.x = properties.maxGridSize[ 0 ]; - else - gridSize.x = blocksCount.x % properties.maxGridSize[ 0 ]; - - if( gridIdx.y < gridsCount.y ) - gridSize.y = properties.maxGridSize[ 1 ]; - else - gridSize.y = blocksCount.y % properties.maxGridSize[ 1 ]; - - if( gridIdx.z < gridsCount.z ) - gridSize.z = properties.maxGridSize[ 2 ]; - else - gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/ - - if( gridIdx.x < gridsCount.x - 1 ) - gridSize.x = getMaxGridSize(); - else - gridSize.x = blocksCount.x % getMaxGridSize(); - - if( gridIdx.y < gridsCount.y - 1 ) - gridSize.y = getMaxGridSize(); - else - gridSize.y = blocksCount.y % getMaxGridSize(); - - if( gridIdx.z < gridsCount.z - 1 ) - gridSize.z = getMaxGridSize(); - else - gridSize.z = blocksCount.z % getMaxGridSize(); -} - -inline void Cuda::printThreadsSetup( const dim3& blockSize, - const dim3& blocksCount, - const dim3& gridSize, - const dim3& gridsCount, - std::ostream& str ) -{ - str << "Block size: " << blockSize << std::endl - << " Blocks count: " << blocksCount << std::endl - << " Grid size: " << gridSize << std::endl - << " Grids count: " << gridsCount << std::endl; -} -#endif - - -template< typename ObjectType > -ObjectType* Cuda::passToDevice( const ObjectType& object ) -{ -#ifdef HAVE_CUDA - ObjectType* deviceObject; - if( cudaMalloc( ( void** ) &deviceObject, - ( size_t ) sizeof( ObjectType ) ) != cudaSuccess ) - throw Exceptions::CudaBadAlloc(); - if( cudaMemcpy( ( void* ) deviceObject, - ( void* ) &object, - sizeof( ObjectType ), - cudaMemcpyHostToDevice ) != cudaSuccess ) - { - TNL_CHECK_CUDA_DEVICE; - cudaFree( ( void* ) deviceObject ); - TNL_CHECK_CUDA_DEVICE; - return 0; - } - return deviceObject; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename ObjectType > -ObjectType Cuda::passFromDevice( const ObjectType* object ) -{ -#ifdef HAVE_CUDA - ObjectType aux; - cudaMemcpy( ( void* ) aux, - ( void* ) &object, - sizeof( ObjectType ), - cudaMemcpyDeviceToHost ); - TNL_CHECK_CUDA_DEVICE; - return aux; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename ObjectType > -void Cuda::passFromDevice( const ObjectType* deviceObject, - ObjectType& hostObject ) -{ -#ifdef HAVE_CUDA - cudaMemcpy( ( void* ) &hostObject, - ( void* ) deviceObject, - sizeof( ObjectType ), - cudaMemcpyDeviceToHost ); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename ObjectType > -void Cuda::print( const ObjectType* deviceObject, std::ostream& str ) -{ -#ifdef HAVE_CUDA - ObjectType hostObject; - passFromDevice( deviceObject, hostObject ); - str << hostObject; -#endif -} - - -template< typename ObjectType > -void Cuda::freeFromDevice( ObjectType* deviceObject ) -{ -#ifdef HAVE_CUDA - cudaFree( ( void* ) deviceObject ); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -#ifdef HAVE_CUDA -template< typename Index > -__device__ Index Cuda::getInterleaving( const Index index ) -{ - return index + index / Cuda::getNumberOfSharedMemoryBanks(); -} - -template< typename Element > -__device__ Element* Cuda::getSharedMemory() -{ - return CudaSharedMemory< Element >(); -} -#endif - -#ifdef HAVE_CUDA -inline void Cuda::checkDevice( const char* file_name, int line, cudaError error ) -{ - if( error != cudaSuccess ) - throw Exceptions::CudaRuntimeError( error, file_name, line ); -} -#endif - -inline void Cuda::insertSmartPointer( Pointers::SmartPointer* pointer ) -{ - getSmartPointersRegister().insert( pointer, Devices::CudaDeviceInfo::getActiveDevice() ); -} - -inline void Cuda::removeSmartPointer( Pointers::SmartPointer* pointer ) -{ - getSmartPointersRegister().remove( pointer, Devices::CudaDeviceInfo::getActiveDevice() ); -} - -inline bool Cuda::synchronizeDevice( int deviceId ) -{ -#ifdef HAVE_CUDA -#ifdef HAVE_CUDA_UNIFIED_MEMORY - return true; -#else - if( deviceId < 0 ) - deviceId = Devices::CudaDeviceInfo::getActiveDevice(); - getSmartPointersSynchronizationTimer().start(); - bool b = getSmartPointersRegister().synchronizeDevice( deviceId ); - getSmartPointersSynchronizationTimer().stop(); - return b; -#endif -#else - return true; -#endif -} - -inline Timer& Cuda::getSmartPointersSynchronizationTimer() -{ - static Timer timer; - return timer; -} - -inline Pointers::SmartPointersRegister& Cuda::getSmartPointersRegister() -{ - static Pointers::SmartPointersRegister reg; - return reg; -} - -#ifdef HAVE_CUDA -namespace { - std::ostream& operator << ( std::ostream& str, const dim3& d ) - { - str << "( " << d.x << ", " << d.y << ", " << d.z << " )"; - return str; - } -} -#endif - -// double-precision atomicAdd function for Maxwell and older GPUs -// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions -#ifdef HAVE_CUDA -#if __CUDA_ARCH__ < 600 -namespace { - __device__ double atomicAdd(double* address, double val) - { - unsigned long long int* address_as_ull = - (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + - __longlong_as_double(assumed))); - - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - - return __longlong_as_double(old); - } -} // namespace -#endif -#endif - -} // namespace Devices -} // namespace TNL diff --git a/src/TNL/Devices/Host.h b/src/TNL/Devices/Host.h index 40f55711a817e684f379e442ab30cdec485be013..4af7892ecc2c6c8fe96ba936afe29a99cb023a0b 100644 --- a/src/TNL/Devices/Host.h +++ b/src/TNL/Devices/Host.h @@ -19,20 +19,11 @@ #endif namespace TNL { -//! \brief Namespace for TNL execution models namespace Devices { class Host { public: - static String getDeviceType() - { - return String( "Devices::Host" ); - } - - // TODO: Remove getDeviceType(); - static inline String getType() { return getDeviceType();}; - static void disableOMP() { ompEnabled() = false; diff --git a/src/TNL/Devices/MIC.h b/src/TNL/Devices/MIC.h deleted file mode 100644 index f347a24d1f9e4fa6d5cceb7e2693807c7158065a..0000000000000000000000000000000000000000 --- a/src/TNL/Devices/MIC.h +++ /dev/null @@ -1,170 +0,0 @@ -/*************************************************************************** - MIC.h - description - ------------------- - begin : Nov 7, 2016 - copyright : (C) 2016 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Vit Hanousek - -#pragma once - -#include <iostream> -#include <cstring> -#include <unistd.h> -#include <TNL/String.h> -#include <TNL/Assert.h> -#include <TNL/Pointers/SmartPointersRegister.h> -#include <TNL/Timer.h> - -#include <TNL/Devices/CudaCallable.h> - - -namespace TNL { -namespace Devices { -namespace { - -//useful macros from Intel's tutorials -- but we do not use it, becaouse it is tricky (system of maping variables CPU-MIC) -#define ALLOC alloc_if(1) //alloac variable at begining of offloaded block -- default -#define FREE free_if(1) // delete variable at the end of offloaded block -- default -#define RETAIN free_if(0) //do not delete variable at the end of offladed block -#define REUSE alloc_if(0) //do not alloc variable at begin of offloaded block, reuse variable on MIC which was not deleted befeore - -//structure which hides pointer - bypass mapping of variables and addresses of arrays and allow get RAW addres of MIC memory to RAM -template< typename Type > -struct MICHider{ - Type *pointer; -}; - -//inflatable structure -- structures can be copied to MIC - classes not (viz paper published after CSJP 2016 in Krakow) -//object can be copied in side this structure and then copied into MIC memory -template <unsigned int VELIKOST> -struct MICStruct{ - uint8_t data[VELIKOST]; -}; - -//Macros which can make code better readeble --but they are tricky, creating variables with specific names... -//version using inflatable structure -#define TNLMICSTRUCT(bb,typ) Devices::MICStruct<sizeof(typ)> s ## bb; \ - memcpy((void*)& s ## bb,(void*)& bb,sizeof(typ)); -#define TNLMICSTRUCTOFF(bb,typ) s ## bb -#define TNLMICSTRUCTUSE(bb,typ) typ * kernel ## bb = (typ*) &s ## bb; -#define TNLMICSTRUCTALLOC(bb,typ) typ * kernel ## bb = (typ*) malloc (sizeof(typ)); \ - memcpy((void*)kernel ## bb,(void*) & s ## bb, sizeof(typ)); - -//version which retypes pointer of object to pointer to array of uint8_t, -//object can be copied using uint8_t pointer as array with same length as object size -#define TNLMICHIDE(bb,typ) uint8_t * u ## bb=(uint8_t *)&bb; \ - MICHider<typ> kernel ## bb; -#define TNLMICHIDEALLOCOFF(bb,typ) in(u ## bb:length(sizeof(typ))) out(kernel ## bb) -#define TNLMICHIDEALLOC(bb,typ) kernel ## bb.pointer=(typ*)malloc(sizeof(typ)); \ - memcpy((void*)kernel ## bb.pointer,(void*)u ## bb,sizeof(typ)); -#define TNLMICHIDEFREEOFF(bb,typ) in(kernel ## bb) -#define TNLMICHIDEFREE(bb,typ) free((void*)kernel ## bb.pointer - -class MIC -{ - public: - - static String getDeviceType() - { - return String( "Devices::MIC" ); - }; - - // TODO: Remove getDeviceType(); - static inline String getType() { return getDeviceType(); }; - -#ifdef HAVE_MIC - - //useful debuging -- but produce warning - __cuda_callable__ static inline void CheckMIC(void) - { - #ifdef __MIC__ - std::cout<<"ON MIC"<<std::endl; - #else - std::cout<<"ON CPU" <<std::endl; - #endif - }; - - - //old copying funciton -- deprecated - template <typename TYP> - static - TYP * passToDevice(TYP &objektCPU) - { - uint8_t * uk=(uint8_t *)&objektCPU; - MICHider<TYP> ret; - - #pragma offload target(mic) in(uk:length(sizeof(TYP))) out(ret) - { - ret.pointer=(TYP*)malloc(sizeof(TYP)); - std::memcpy((void*)ret.pointer,(void*)uk,sizeof(TYP)); - } - return ret.pointer; - - std::cout << "NÄ›kdo mnÄ› volá :-D" <<std::endl; - }; - - //old cleaning function -- deprecated - template <typename TYP> - static - void freeFromDevice(TYP *objektMIC) - { - MICHider<TYP> ptr; - ptr.pointer=objektMIC; - #pragma offload target(mic) in(ptr) - { - free((void*)ptr.pointer); - } - }; - - static inline - void CopyToMIC(void* mic_ptr,void* ptr,size_t size) - { - uint8_t image[size]; - std::memcpy((void*)&image,ptr,size); - Devices::MICHider<void> hide_ptr; - hide_ptr.pointer=mic_ptr; - #pragma offload target(mic) in(hide_ptr) in(image) in(size) - { - std::memcpy((void*)hide_ptr.pointer,(void*)&image,size); - } - }; - -#endif - - static void insertSmartPointer( Pointers::SmartPointer* pointer ) - { - smartPointersRegister.insert( pointer, -1 ); - } - - static void removeSmartPointer( Pointers::SmartPointer* pointer ) - { - smartPointersRegister.remove( pointer, -1 ); - } - - // Negative deviceId means that CudaDeviceInfo::getActiveDevice will be - // called to get the device ID. - static bool synchronizeDevice( int deviceId = -1 ) - { - smartPointersSynchronizationTimer.start(); - bool b = smartPointersRegister.synchronizeDevice( deviceId ); - smartPointersSynchronizationTimer.stop(); - return b; - } - - static Timer smartPointersSynchronizationTimer; - -protected: - static Pointers::SmartPointersRegister smartPointersRegister; -}; - -Pointers::SmartPointersRegister MIC::smartPointersRegister; -Timer MIC::smartPointersSynchronizationTimer; - -} // namespace <unnamed> -} // namespace Devices -} // namespace TNL diff --git a/src/TNL/Devices/Sequential.h b/src/TNL/Devices/Sequential.h new file mode 100644 index 0000000000000000000000000000000000000000..f00660f1961e5534db2bab2568a2a98e4fe7a622 --- /dev/null +++ b/src/TNL/Devices/Sequential.h @@ -0,0 +1,21 @@ +/*************************************************************************** + Sequential.h - description + ------------------- + begin : Aug 17, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +//! \brief Namespace for TNL execution models +namespace Devices { + +struct Sequential +{}; + +} // namespace Devices +} // namespace TNL diff --git a/src/TNL/Exceptions/MICBadAlloc.h b/src/TNL/Exceptions/MICBadAlloc.h deleted file mode 100644 index b8f3a9157c54d8155652a42a700ad71a221aa201..0000000000000000000000000000000000000000 --- a/src/TNL/Exceptions/MICBadAlloc.h +++ /dev/null @@ -1,31 +0,0 @@ -/*************************************************************************** - MICBadAlloc.h - description - ------------------- - begin : Jul 31, 2017 - copyright : (C) 2017 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Jakub Klinkovsky - -#pragma once - -#include <new> - -namespace TNL { -namespace Exceptions { - -struct MICBadAlloc - : public std::bad_alloc -{ - const char* what() const throw() - { - return "Failed to allocate memory on the MIC device: " - "most likely there is not enough space on the device memory."; - } -}; - -} // namespace Exceptions -} // namespace TNL diff --git a/src/TNL/Exceptions/MICSupportMissing.h b/src/TNL/Exceptions/MICSupportMissing.h deleted file mode 100644 index 6d4260e6addbbb9dd89a7c9d5a07833485c6a0c2..0000000000000000000000000000000000000000 --- a/src/TNL/Exceptions/MICSupportMissing.h +++ /dev/null @@ -1,30 +0,0 @@ -/*************************************************************************** - MICSupportMissing.h - description - ------------------- - begin : Jul 31, 2017 - copyright : (C) 2017 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Jakub Klinkovsky - -#pragma once - -#include <stdexcept> - -namespace TNL { -namespace Exceptions { - -struct MICSupportMissing - : public std::runtime_error -{ - MICSupportMissing() - : std::runtime_error( "MIC support is missing, but the program called a function which needs it. " - "Please recompile the program with MIC support." ) - {} -}; - -} // namespace Exceptions -} // namespace TNL diff --git a/src/TNL/Experimental/Arithmetics/Quad.h b/src/TNL/Experimental/Arithmetics/Quad.h index 13d9c823155ad5fee670f61eb89486d56a08a7df..3c1dd073f54ccf8b258e64a73819b11a15fe3202 100644 --- a/src/TNL/Experimental/Arithmetics/Quad.h +++ b/src/TNL/Experimental/Arithmetics/Quad.h @@ -33,8 +33,6 @@ public: explicit Quad(const T&); explicit Quad(int); Quad(const Quad<T>&); - - static String getType(); /*OVERLOADED OPERATORS*/ T& operator[](int); diff --git a/src/TNL/Experimental/Arithmetics/Quad_impl.h b/src/TNL/Experimental/Arithmetics/Quad_impl.h index 63c08a40180d420d069bd4cb640ea7137ff1cb22..3a2ecb245061a28af50f552a8cb20790db89eba0 100644 --- a/src/TNL/Experimental/Arithmetics/Quad_impl.h +++ b/src/TNL/Experimental/Arithmetics/Quad_impl.h @@ -56,14 +56,6 @@ Quad<T>::Quad(const Quad<T>& other) { data[3] = other[3]; } -template <class T> -String -Quad< T >:: -getType() -{ - return String( "Quad< " + getType< T >() + " >" ); -} - template <class T> T& Quad<T>::operator [](int idx) { return data[idx]; diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h index 55129c4e1008e69ef3b3d238acb8fc587cce4076..49cda643cc73a6717f47d877315b980e5589d60c 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase1D_impl.h @@ -22,10 +22,10 @@ initInterface( const MeshFunctionPointer& _input, const MeshType& mesh = _input->getMesh(); const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >() ); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h index cddf4f9cb7a97f8a74eb94f7377b2cf740db03a5..b18252cb078b803d6e911ca130b9b161c241ff4d 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase2D_impl.h @@ -25,11 +25,11 @@ initInterface( const MeshFunctionPointer& _input, const MeshType& mesh = _input->getMesh(); const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); CudaInitCaller<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >(), diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h index 32548abcfe66affd71a79d3ed5f2f21d67df644e..fd7dc9381ec1325f108078271df343953be58ebd 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlDirectEikonalMethodBase3D_impl.h @@ -23,14 +23,14 @@ initInterface( const MeshFunctionPointer& _input, const MeshType& mesh = _input->getMesh(); const int cudaBlockSize( 8 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh.getDimensions().x(), cudaBlockSize ); + int numBlocksY = Cuda::getNumberOfBlocks( mesh.getDimensions().y(), cudaBlockSize ); + int numBlocksZ = Cuda::getNumberOfBlocks( mesh.getDimensions().z(), cudaBlockSize ); if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; dim3 blockSize( cudaBlockSize, cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY, numBlocksZ ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); CudaInitCaller3d<<< gridSize, blockSize >>>( _input.template getData< Device >(), _output.template modifyData< Device >(), _interfaceMap.template modifyData< Device >(), vLower, vUpper ); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h index f2f033ccbee3ffa5b71567ea1b54e2307ebe1713..52c2ebbee3f16fd39a2dfab738ac9bea6ffaf393 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod1D_impl.h @@ -105,7 +105,7 @@ solve( const MeshPointer& mesh, // TODO: CUDA code #ifdef HAVE_CUDA const int cudaBlockSize( 16 ); - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x(), cudaBlockSize ); dim3 blockSize( cudaBlockSize ); dim3 gridSize( numBlocksX ); diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h index e5638c11dd71d88d72d8d0590c8e51c0df6baaab..1b1666a02b627778bd8364a7e46f5e22718ff76f 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod2D_impl.h @@ -251,8 +251,8 @@ solve( const MeshPointer& mesh, const int cudaBlockSize( 16 ); // Setting number of threads and blocks for kernel - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize ); + int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize ); dim3 blockSize( cudaBlockSize, cudaBlockSize ); dim3 gridSize( numBlocksX, numBlocksY ); @@ -316,7 +316,7 @@ solve( const MeshPointer& mesh, /** HERE IS FIM FOR MPI AND WITHOUT MPI **/ - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); CudaUpdateCellCaller<18><<< gridSize, blockSize >>>( ptr, interfaceMapPtr.template getData< Device >(), auxPtr.template getData< Device>(), helpFunc.template modifyData< Device>(), blockCalculationIndicator.getView(), vecLowerOverlaps, vecUpperOverlaps ); @@ -327,7 +327,7 @@ solve( const MeshPointer& mesh, auxPtr.swap( helpFunc ); // Getting blocks that should calculate in next passage. These blocks are neighbours of those that were calculated now. - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); GetNeighbours<<< nBlocksNeigh, 1024 >>>( blockCalculationIndicator.getView(), blockCalculationIndicatorHelp.getView(), numBlocksX, numBlocksY ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; @@ -349,7 +349,7 @@ solve( const MeshPointer& mesh, if( numIter%2 == 1 ) // Need to check parity for MPI overlaps to synchronize ( otherwise doesnt work ) { helpFunc.swap( auxPtr ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; } diff --git a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h index 325b626f7bf5262637f8e1b43ec9e156bbeca26b..82185a937d832b0785b597188aeb0989ab751d47 100644 --- a/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h +++ b/src/TNL/Experimental/Hamilton-Jacobi/Solvers/hamilton-jacobi/tnlFastSweepingMethod3D_impl.h @@ -263,9 +263,9 @@ solve( const MeshPointer& mesh, const int cudaBlockSize( 8 ); // Getting the number of blocks in grid in each direction (without overlaps bcs we dont calculate on overlaps) - int numBlocksX = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize ); - int numBlocksY = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize ); - int numBlocksZ = Devices::Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); + int numBlocksX = Cuda::getNumberOfBlocks( mesh->getDimensions().x() - vecLowerOverlaps[0] - vecUpperOverlaps[0], cudaBlockSize ); + int numBlocksY = Cuda::getNumberOfBlocks( mesh->getDimensions().y() - vecLowerOverlaps[1] - vecUpperOverlaps[1], cudaBlockSize ); + int numBlocksZ = Cuda::getNumberOfBlocks( mesh->getDimensions().z() - vecLowerOverlaps[2] - vecUpperOverlaps[2], cudaBlockSize ); if( cudaBlockSize * cudaBlockSize * cudaBlockSize > 1024 || numBlocksX > 1024 || numBlocksY > 1024 || numBlocksZ > 64 ) std::cout << "Invalid kernel call. Dimensions of grid are max: [1024,1024,64], and maximum threads per block are 1024!" << std::endl; @@ -295,14 +295,14 @@ solve( const MeshPointer& mesh, //MeshFunctionPointer helpFunc1( mesh ); MeshFunctionPointer helpFunc( mesh ); helpFunc.template modifyData() = auxPtr.template getData(); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); int numIter = 0; // number of passages of following while cycle while( BlockIterD ) //main body of cuda code { - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); // main function that calculates all values in each blocks // calculated values are in helpFunc CudaUpdateCellCaller< 10 ><<< gridSize, blockSize >>>( ptr, @@ -315,14 +315,14 @@ solve( const MeshPointer& mesh, // Switching pointers to helpFunc and auxPtr so real results are in memory of helpFunc but here under variable auxPtr auxPtr.swap( helpFunc ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); // Neighbours of blocks that calculatedBefore in this passage should calculate in the next! // BlockIterDevice contains blocks that calculatedBefore in this passage and BlockIterPom those that should calculate in next (are neighbours) GetNeighbours<<< nBlocksNeigh, 1024 >>>( BlockIterDevice.getView(), BlockIterPom.getView(), numBlocksX, numBlocksY, numBlocksZ ); cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; BlockIterDevice = BlockIterPom; - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); // .containsValue(1) is actually parallel reduction implemented in TNL BlockIterD = BlockIterDevice.containsValue(1); @@ -340,7 +340,7 @@ solve( const MeshPointer& mesh, // We need auxPtr to point on memory of original auxPtr (not to helpFunc) // last passage of previous while cycle didnt calculate any number anyway so switching names doesnt effect values auxPtr.swap( helpFunc ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); } cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; diff --git a/src/TNL/File.h b/src/TNL/File.h index 1aa5615e5cbbf8f36d2c9ac3d98bdbc3ba4ada03..cef110e1633537ecb5b13770cc805dccb1e2786f 100644 --- a/src/TNL/File.h +++ b/src/TNL/File.h @@ -14,9 +14,8 @@ #include <type_traits> #include <TNL/String.h> -#include <TNL/Devices/Host.h> -#include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> +#include <TNL/Allocators/Host.h> +#include <TNL/Allocators/Cuda.h> namespace TNL { @@ -86,9 +85,9 @@ class File /** * \brief Method for loading data from the file. * - * The data will be stored in \e buffer allocated on device given by the - * \e Device parameter. The data type of the buffer is given by the - * template parameter \e Type. The second template parameter + * The data will be stored in \e buffer which was allocated using the + * allocator of type \e Allocator. The data type of the buffer is given + * by the template parameter \e Type. The second template parameter * \e SourceType defines the type of data in the source file. If both * types are different, on-the-fly conversion takes place during the * data loading. @@ -97,31 +96,31 @@ class File * * \tparam Type type of data to be loaded to the \e buffer. * \tparam SourceType type of data stored on the file, - * \tparam Device device where the data are stored after reading. For example \ref Devices::Host or \ref Devices::Cuda. + * \tparam Allocator type of the allocator which was used to allocate \e buffer. * \param buffer Pointer in memory where the elements are loaded and stored after reading. * \param elements number of elements to be loaded from the file. - * + * * The following example shows how to load data directly to GPU. - * + * * \par Example * \include FileExampleCuda.cpp * \par Output * \include FileExampleCuda.out * The following example shows how to do on-the-fly data conversion. - * + * * \par Example * \include FileExampleSaveAndLoad.cpp * \par Output * \include FileExampleSaveAndLoad.out */ - template< typename Type, typename SourceType = Type, typename Device = Devices::Host > + template< typename Type, typename SourceType = Type, typename Allocator = Allocators::Host< Type > > void load( Type* buffer, std::streamsize elements = 1 ); /** * \brief Method for saving data to the file. * - * The data from the \e buffer (with type \e Type) allocated on the device - * \e Device will be saved into the file. \e TargetType defines as what + * The data from the \e buffer (with type \e Type) which was allocated + * using an allocator of type \e Allocator. \e TargetType defines as what * data type the buffer shall be saved. If the type is different from the * data type, on-the-fly data type conversion takes place during the data * saving. @@ -130,69 +129,49 @@ class File * * \tparam Type type of data in the \e buffer. * \tparam TargetType tells as what type data the buffer shall be saved. - * \tparam Device device from where the data are loaded before writing into file. For example \ref Devices::Host or \ref Devices::Cuda. + * \tparam Allocator type of the allocator which was used to allocate \e buffer. * \tparam Index type of index by which the elements are indexed. * \param buffer buffer that is going to be saved to the file. * \param elements number of elements saved to the file. * * See \ref File::load for examples. */ - template< typename Type, typename TargetType = Type, typename Device = Devices::Host > + template< typename Type, typename TargetType = Type, typename Allocator = Allocators::Host< Type > > void save( const Type* buffer, std::streamsize elements = 1 ); protected: + // implementation for all allocators which allocate data accessible from host template< typename Type, typename SourceType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > + typename Allocator, + typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > > void load_impl( Type* buffer, std::streamsize elements ); + // implementation for \ref Allocators::Cuda template< typename Type, typename SourceType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, + typename Allocator, + typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >, typename = void > void load_impl( Type* buffer, std::streamsize elements ); - template< typename Type, - typename SourceType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, - typename = void, - typename = void > - void load_impl( Type* buffer, std::streamsize elements ); - - template< typename Type, - typename TargetType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::Host >::value >::type > - void save_impl( const Type* buffer, std::streamsize elements ); - + // implementation for all allocators which allocate data accessible from host template< typename Type, typename TargetType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::Cuda >::value >::type, - typename = void > + typename Allocator, + typename = std::enable_if_t< ! std::is_same< Allocator, Allocators::Cuda< Type > >::value > > void save_impl( const Type* buffer, std::streamsize elements ); + // implementation for \ref Allocators::Cuda template< typename Type, typename TargetType, - typename Device, - typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type, - typename = void, + typename Allocator, + typename = std::enable_if_t< std::is_same< Allocator, Allocators::Cuda< Type > >::value >, typename = void > void save_impl( const Type* buffer, std::streamsize elements ); std::fstream file; String fileName; - - //// - // When we transfer data between the GPU and the CPU we use 5 MB buffer. This - // size should ensure good performance -- see. - // http://wiki.accelereyes.com/wiki/index.php/GPU_Memory_Transfer . - // We use the same buffer size even for retyping data during IO operations. - // - static constexpr std::streamsize TransferBufferSize = 5 * 2<<20; }; /** diff --git a/src/TNL/File.hpp b/src/TNL/File.hpp index f4edd2b9638e0331da973bdf16de06568d4b7c23..af112e992a7640070ab880192688b3a0aac8f1d2 100644 --- a/src/TNL/File.hpp +++ b/src/TNL/File.hpp @@ -17,8 +17,9 @@ #include <TNL/File.h> #include <TNL/Assert.h> +#include <TNL/Cuda/CheckDevice.h> +#include <TNL/Cuda/LaunchHelpers.h> #include <TNL/Exceptions/CudaSupportMissing.h> -#include <TNL/Exceptions/MICSupportMissing.h> #include <TNL/Exceptions/FileSerializationError.h> #include <TNL/Exceptions/FileDeserializationError.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -79,21 +80,23 @@ inline void File::close() template< typename Type, typename SourceType, - typename Device > + typename Allocator > void File::load( Type* buffer, std::streamsize elements ) { + static_assert( std::is_same< Type, typename Allocator::value_type >::value, + "Allocator::value_type must be the same as Type." ); TNL_ASSERT_GE( elements, 0, "Number of elements to load must be non-negative." ); if( ! elements ) return; - load_impl< Type, SourceType, Device >( buffer, elements ); + load_impl< Type, SourceType, Allocator >( buffer, elements ); } -// Host +// Host allocators template< typename Type, typename SourceType, - typename Device, + typename Allocator, typename > void File::load_impl( Type* buffer, std::streamsize elements ) { @@ -101,7 +104,7 @@ void File::load_impl( Type* buffer, std::streamsize elements ) file.read( reinterpret_cast<char*>(buffer), sizeof(Type) * elements ); else { - const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); + const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SourceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; std::streamsize readElements = 0; @@ -116,15 +119,15 @@ void File::load_impl( Type* buffer, std::streamsize elements ) } } -// Cuda +// Allocators::Cuda template< typename Type, typename SourceType, - typename Device, + typename Allocator, typename, typename > void File::load_impl( Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA - const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; @@ -145,7 +148,7 @@ void File::load_impl( Type* buffer, std::streamsize elements ) } else { - const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(SourceType), elements ); + const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(SourceType), elements ); using BaseType = typename std::remove_cv< SourceType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; @@ -168,65 +171,25 @@ void File::load_impl( Type* buffer, std::streamsize elements ) #endif } -// MIC -template< typename Type, - typename SourceType, - typename Device, - typename, typename, typename > -void File::load_impl( Type* buffer, std::streamsize elements ) -{ -#ifdef HAVE_MIC - const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); - using BaseType = typename std::remove_cv< Type >::type; - std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; - - std::streamsize readElements = 0; - if( std::is_same< Type, SourceType >::value ) - { - while( readElements < elements ) - { - const std::streamsize transfer = std::min( elements - readElements, host_buffer_size ); - file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer ); - - Devices::MICHider<Type> device_buff; - device_buff.pointer=buffer; - #pragma offload target(mic) in(device_buff,readElements) in(host_buffer:length(transfer)) - { - /* - for(int i=0;i<transfer;i++) - device_buff.pointer[readElements+i]=host_buffer[i]; - */ - memcpy(&(device_buff.pointer[readElements]), host_buffer.get(), transfer*sizeof(Type) ); - } - - readElements += transfer; - } - free( host_buffer ); - } - else - throw Exceptions::NotImplementedError("Type conversion during loading is not implemented for MIC."); -#else - throw Exceptions::MICSupportMissing(); -#endif -} - template< typename Type, typename TargetType, - typename Device > + typename Allocator > void File::save( const Type* buffer, std::streamsize elements ) { + static_assert( std::is_same< Type, typename Allocator::value_type >::value, + "Allocator::value_type must be the same as Type." ); TNL_ASSERT_GE( elements, 0, "Number of elements to save must be non-negative." ); if( ! elements ) return; - save_impl< Type, TargetType, Device >( buffer, elements ); + save_impl< Type, TargetType, Allocator >( buffer, elements ); } -// Host +// Host allocators template< typename Type, typename TargetType, - typename Device, + typename Allocator, typename > void File::save_impl( const Type* buffer, std::streamsize elements ) { @@ -234,7 +197,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements ) file.write( reinterpret_cast<const char*>(buffer), sizeof(Type) * elements ); else { - const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements ); + const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements ); using BaseType = typename std::remove_cv< TargetType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; std::streamsize writtenElements = 0; @@ -250,15 +213,15 @@ void File::save_impl( const Type* buffer, std::streamsize elements ) } } -// Cuda +// Allocators::Cuda template< typename Type, typename TargetType, - typename Device, + typename Allocator, typename, typename > void File::save_impl( const Type* buffer, std::streamsize elements ) { #ifdef HAVE_CUDA - const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); + const std::streamsize host_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(Type), elements ); using BaseType = typename std::remove_cv< Type >::type; std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; @@ -279,7 +242,7 @@ void File::save_impl( const Type* buffer, std::streamsize elements ) } else { - const std::streamsize cast_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(TargetType), elements ); + const std::streamsize cast_buffer_size = std::min( Cuda::getTransferBufferSize() / (std::streamsize) sizeof(TargetType), elements ); using BaseType = typename std::remove_cv< TargetType >::type; std::unique_ptr< BaseType[] > cast_buffer{ new BaseType[ cast_buffer_size ] }; @@ -303,48 +266,6 @@ void File::save_impl( const Type* buffer, std::streamsize elements ) #endif } -// MIC -template< typename Type, - typename TargetType, - typename Device, - typename, typename, typename > -void File::save_impl( const Type* buffer, std::streamsize elements ) -{ -#ifdef HAVE_MIC - const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements ); - using BaseType = typename std::remove_cv< Type >::type; - std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; - - std::streamsize writtenElements = 0; - if( std::is_same< Type, TargetType >::value ) - { - while( this->writtenElements < elements ) - { - const std::streamsize transfer = std::min( elements - writtenElements, host_buffer_size ); - - Devices::MICHider<const Type> device_buff; - device_buff.pointer=buffer; - #pragma offload target(mic) in(device_buff,writtenElements) out(host_buffer:length(transfer)) - { - //THIS SHOULD WORK... BUT NOT WHY? - /*for(int i=0;i<transfer;i++) - host_buffer[i]=device_buff.pointer[writtenElements+i]; - */ - - memcpy(host_buffer.get(), &(device_buff.pointer[writtenElements]), transfer*sizeof(Type) ); - } - - file.write( reinterpret_cast<const char*>(host_buffer.get()), sizeof(Type) * transfer ); - writtenElements += transfer; - } - } - else - throw Exceptions::NotImplementedError("Type conversion during saving is not implemented for MIC."); -#else - throw Exceptions::MICSupportMissing(); -#endif -} - inline bool fileExists( const String& fileName ) { std::fstream file; diff --git a/src/TNL/Functions/Analytic/Blob.h b/src/TNL/Functions/Analytic/Blob.h index e12a27393c7077f71fe57137a6fce3a2abc00d0c..5a95257cdf0e0a78d071ca6b078c8f9ec2d687ed 100644 --- a/src/TNL/Functions/Analytic/Blob.h +++ b/src/TNL/Functions/Analytic/Blob.h @@ -50,8 +50,6 @@ class Blob< 1, Real > : public BlobBase< Real, 1 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Blob(); template< int XDiffOrder = 0, @@ -75,8 +73,6 @@ class Blob< 2, Real > : public BlobBase< Real, 2 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Blob(); template< int XDiffOrder = 0, @@ -101,8 +97,6 @@ class Blob< 3, Real > : public BlobBase< Real, 3 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Blob(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/Blob_impl.h b/src/TNL/Functions/Analytic/Blob_impl.h index f615a10dd7826decd3ec2432e29e96a54371824e..f5195f758e979441a81e7dcfbcf2431705a6f8bd 100644 --- a/src/TNL/Functions/Analytic/Blob_impl.h +++ b/src/TNL/Functions/Analytic/Blob_impl.h @@ -32,13 +32,6 @@ setup( const Config::ParameterContainer& parameters, * 1D */ -template< typename Real > -String -Blob< 1, Real >::getType() -{ - return "Functions::Analytic::Blob< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > Blob< 1, Real >::Blob() { @@ -75,13 +68,6 @@ operator()( const PointType& v, /**** * 2D */ -template< typename Real > -String -Blob< 2, Real >::getType() -{ - return String( "Functions::Analytic::Blob< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Blob< 2, Real >::Blob() { @@ -119,13 +105,6 @@ operator()( const PointType& v, /**** * 3D */ -template< typename Real > -String -Blob< 3, Real >::getType() -{ - return String( "Functions::Analytic::Blob< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Blob< 3, Real >::Blob() { diff --git a/src/TNL/Functions/Analytic/Cylinder.h b/src/TNL/Functions/Analytic/Cylinder.h index fb3f0542ceda6b3c1b334c300419549d5217bf2b..8b8ab198319a413d9cf95c9e92d1dcb23bc777c6 100644 --- a/src/TNL/Functions/Analytic/Cylinder.h +++ b/src/TNL/Functions/Analytic/Cylinder.h @@ -54,8 +54,6 @@ class Cylinder< 1, Real > : public CylinderBase< Real, 1 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Cylinder(); template< int XDiffOrder = 0, @@ -81,8 +79,6 @@ class Cylinder< 2, Real > : public CylinderBase< Real, 2 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Cylinder(); template< int XDiffOrder = 0, @@ -108,8 +104,6 @@ class Cylinder< 3, Real > : public CylinderBase< Real, 3 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Cylinder(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/Cylinder_impl.h b/src/TNL/Functions/Analytic/Cylinder_impl.h index b0698bca32056610195f5a9ab23c3e603455e1e5..b76286580c8c13a13c5ffa56fa29ef7943da1fe0 100644 --- a/src/TNL/Functions/Analytic/Cylinder_impl.h +++ b/src/TNL/Functions/Analytic/Cylinder_impl.h @@ -47,13 +47,6 @@ const Real& CylinderBase< Real, Dimension >::getDiameter() const * 1D */ -template< typename Real > -String -Cylinder< 1, Real >::getType() -{ - return "Functions::Analytic::Cylinder< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > Cylinder< 1, Real >::Cylinder() { @@ -91,13 +84,6 @@ operator()( const PointType& v, * 2D */ -template< typename Real > -String -Cylinder< 2, Real >::getType() -{ - return String( "Functions::Analytic::Cylinder< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Cylinder< 2, Real >::Cylinder() { @@ -137,14 +123,6 @@ operator()( const PointType& v, /**** * 3D */ - -template< typename Real > -String -Cylinder< 3, Real >::getType() -{ - return String( "Functions::Analytic::Cylinder< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Cylinder< 3, Real >::Cylinder() { diff --git a/src/TNL/Functions/Analytic/ExpBump.h b/src/TNL/Functions/Analytic/ExpBump.h index 36b07c9a27d549532fb055c01c075cf5e30aa8a8..48fc613d9b40278140d08ffd0285097dbd7fd326 100644 --- a/src/TNL/Functions/Analytic/ExpBump.h +++ b/src/TNL/Functions/Analytic/ExpBump.h @@ -58,8 +58,6 @@ class ExpBump< 1, Real > : public ExpBumpBase< 1, Real > typedef Real RealType; typedef Containers::StaticVector< 1, RealType > PointType; - static String getType(); - ExpBump(); template< int XDiffOrder = 0, @@ -82,8 +80,6 @@ class ExpBump< 2, Real > : public ExpBumpBase< 2, Real > typedef Real RealType; typedef Containers::StaticVector< 2, RealType > PointType; - static String getType(); - ExpBump(); template< int XDiffOrder = 0, @@ -106,9 +102,6 @@ class ExpBump< 3, Real > : public ExpBumpBase< 3, Real > typedef Real RealType; typedef Containers::StaticVector< 3, RealType > PointType; - - static String getType(); - ExpBump(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/ExpBump_impl.h b/src/TNL/Functions/Analytic/ExpBump_impl.h index 54ecbe2a66fb011e827c1c876aa385ad4b4eee57..6c1103f02ae918cdf35563ccc86480d045d23598 100644 --- a/src/TNL/Functions/Analytic/ExpBump_impl.h +++ b/src/TNL/Functions/Analytic/ExpBump_impl.h @@ -63,13 +63,6 @@ const Real& ExpBumpBase< dimensions, Real >::getSigma() const * 1D */ -template< typename Real > -String -ExpBump< 1, Real >::getType() -{ - return "Functions::Analytic::ExpBump< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > ExpBump< 1, Real >::ExpBump() { @@ -113,13 +106,6 @@ operator()( const PointType& v, * 2D */ -template< typename Real > -String -ExpBump< 2, Real >::getType() -{ - return String( "Functions::Analytic::ExpBump< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > ExpBump< 2, Real >::ExpBump() { @@ -168,13 +154,6 @@ operator()( const PointType& v, * 3D */ -template< typename Real > -String -ExpBump< 3, Real >::getType() -{ - return String( "Functions::Analytic::ExpBump< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > ExpBump< 3, Real >::ExpBump() { diff --git a/src/TNL/Functions/Analytic/Flowerpot.h b/src/TNL/Functions/Analytic/Flowerpot.h index 5a42c5f94249aa69c320959a9713f13f88beec56..f33d32b1899fc9df026e10cd34a841e8b3d32023 100644 --- a/src/TNL/Functions/Analytic/Flowerpot.h +++ b/src/TNL/Functions/Analytic/Flowerpot.h @@ -54,8 +54,6 @@ class Flowerpot< 1, Real > : public FlowerpotBase< Real, 1 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Flowerpot(); template< int XDiffOrder = 0, @@ -81,8 +79,6 @@ class Flowerpot< 2, Real > : public FlowerpotBase< Real, 2 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Flowerpot(); template< int XDiffOrder = 0, @@ -108,8 +104,6 @@ class Flowerpot< 3, Real > : public FlowerpotBase< Real, 3 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Flowerpot(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/Flowerpot_impl.h b/src/TNL/Functions/Analytic/Flowerpot_impl.h index 455b4682b29780bdf526b45adf9b228c7c225073..6769a794b2e7882706d3c7f1f2c3ed11ba655a83 100644 --- a/src/TNL/Functions/Analytic/Flowerpot_impl.h +++ b/src/TNL/Functions/Analytic/Flowerpot_impl.h @@ -45,13 +45,6 @@ const Real& FlowerpotBase< Real, Dimension >::getDiameter() const * 1D */ -template< typename Real > -String -Flowerpot< 1, Real >::getType() -{ - return "Functions::Analytic::Flowerpot< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > Flowerpot< 1, Real >::Flowerpot() { @@ -89,13 +82,6 @@ operator()( const PointType& v, /**** * 2D */ -template< typename Real > -String -Flowerpot< 2, Real >::getType() -{ - return String( "Functions::Analytic::Flowerpot< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Flowerpot< 2, Real >::Flowerpot() { @@ -136,13 +122,6 @@ operator()( const PointType& v, * 3D */ -template< typename Real > -String -Flowerpot< 3, Real >::getType() -{ - return String( "Functions::Analytic::Flowerpot< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Flowerpot< 3, Real >::Flowerpot() { diff --git a/src/TNL/Functions/Analytic/PseudoSquare.h b/src/TNL/Functions/Analytic/PseudoSquare.h index ea4a5ae84e7e306560e67c74f40075cd3cc5a883..1139f6ed83462e1fef9c72a749332f05f39410d5 100644 --- a/src/TNL/Functions/Analytic/PseudoSquare.h +++ b/src/TNL/Functions/Analytic/PseudoSquare.h @@ -50,8 +50,6 @@ class PseudoSquare< 1, Real > : public PseudoSquareBase< Real, 1 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - PseudoSquare(); template< int XDiffOrder = 0, @@ -75,8 +73,6 @@ class PseudoSquare< 2, Real > : public PseudoSquareBase< Real, 2 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - PseudoSquare(); template< int XDiffOrder = 0, @@ -100,8 +96,6 @@ class PseudoSquare< 3, Real > : public PseudoSquareBase< Real, 3 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - PseudoSquare(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/PseudoSquare_impl.h b/src/TNL/Functions/Analytic/PseudoSquare_impl.h index 5da33707a43307bf4b343384e924f15b5a1518b6..18edb0d347709369a750c3114dc1884a912b9d84 100644 --- a/src/TNL/Functions/Analytic/PseudoSquare_impl.h +++ b/src/TNL/Functions/Analytic/PseudoSquare_impl.h @@ -33,13 +33,6 @@ setup( const Config::ParameterContainer& parameters, * 1D */ -template< typename Real > -String -PseudoSquare< 1, Real >::getType() -{ - return "Functions::Analytic::PseudoSquare< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > PseudoSquare< 1, Real >::PseudoSquare() { @@ -76,13 +69,6 @@ operator()( const PointType& v, /**** * 2D */ -template< typename Real > -String -PseudoSquare< 2, Real >::getType() -{ - return String( "Functions::Analytic::PseudoSquare< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > PseudoSquare< 2, Real >::PseudoSquare() { @@ -120,13 +106,6 @@ operator()( const PointType& v, /**** * 3D */ -template< typename Real > -String -PseudoSquare< 3, Real >::getType() -{ - return String( "Functions::Analytic::PseudoSquare< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > PseudoSquare< 3, Real >::PseudoSquare() { diff --git a/src/TNL/Functions/Analytic/Twins.h b/src/TNL/Functions/Analytic/Twins.h index c882ec4eb133c326195151b9d6db07098bec3735..775caf391c2b0f37dbabefbb48a07c80fdcea69d 100644 --- a/src/TNL/Functions/Analytic/Twins.h +++ b/src/TNL/Functions/Analytic/Twins.h @@ -46,8 +46,6 @@ class Twins< 1, Real > : public TwinsBase< Real, 1 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Twins(); template< int XDiffOrder = 0, @@ -73,8 +71,6 @@ class Twins< 2, Real > : public TwinsBase< Real, 2 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Twins(); template< int XDiffOrder = 0, @@ -100,8 +96,6 @@ class Twins< 3, Real > : public TwinsBase< Real, 3 > typedef Real RealType; typedef Containers::StaticVector< Dimension, Real > PointType; - static String getType(); - Twins(); template< int XDiffOrder = 0, diff --git a/src/TNL/Functions/Analytic/Twins_impl.h b/src/TNL/Functions/Analytic/Twins_impl.h index 9e1cd81c185748cce2f038e0c55f154157056751..7b2ce41c7c2543a20d7b3750f406be241fca490c 100644 --- a/src/TNL/Functions/Analytic/Twins_impl.h +++ b/src/TNL/Functions/Analytic/Twins_impl.h @@ -31,13 +31,6 @@ setup( const Config::ParameterContainer& parameters, * 1D */ -template< typename Real > -String -Twins< 1, Real >::getType() -{ - return "Functions::Analytic::Twins< 1, " + TNL::getType< Real >() + String( " >" ); -} - template< typename Real > Twins< 1, Real >::Twins() { @@ -75,13 +68,6 @@ operator()( const PointType& v, /**** * 2D */ -template< typename Real > -String -Twins< 2, Real >::getType() -{ - return String( "Functions::Analytic::Twins< 2, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Twins< 2, Real >::Twins() { @@ -121,13 +107,6 @@ operator()( const PointType& v, /**** * 3D */ -template< typename Real > -String -Twins< 3, Real >::getType() -{ - return String( "Functions::Analytic::Twins< 3, " ) + TNL::getType< Real >() + " >"; -} - template< typename Real > Twins< 3, Real >::Twins() { diff --git a/src/TNL/Functions/Analytic/VectorNorm.h b/src/TNL/Functions/Analytic/VectorNorm.h index a9d292c5f625a22e15115fe6a69d1f5903525a91..583f3eebcb22fa815fa2624dbd73de3073498c73 100644 --- a/src/TNL/Functions/Analytic/VectorNorm.h +++ b/src/TNL/Functions/Analytic/VectorNorm.h @@ -144,8 +144,6 @@ class VectorNorm< 1, Real > : public VectorNormBase< 1, Real > using typename BaseType::RealType; using typename BaseType::PointType; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0 > @@ -184,8 +182,6 @@ class VectorNorm< 2, Real > : public VectorNormBase< 2, Real > using typename BaseType::RealType; using typename BaseType::PointType; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0 > @@ -232,8 +228,6 @@ class VectorNorm< 3, Real > : public VectorNormBase< 3, Real > using typename BaseType::RealType; using typename BaseType::PointType; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0 > diff --git a/src/TNL/Functions/CutMeshFunction.h b/src/TNL/Functions/CutMeshFunction.h index 4cad00d1b9f87a8c241422c2b29f4acd36d8458e..e727b15fcd92313ebc1010cebe32f5b4a74b35d2 100644 --- a/src/TNL/Functions/CutMeshFunction.h +++ b/src/TNL/Functions/CutMeshFunction.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Functions/MeshFunction.h> -#include <TNL/StaticVectorFor.h> +#include <TNL/Algorithms/StaticVectorFor.h> #include <TNL/Containers/StaticVector.h> namespace TNL { @@ -101,7 +101,7 @@ class CutMeshFunction typename OutMesh::CoordinatesType starts; starts.setValue(0); - StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel); + Algorithms::StaticVectorFor::exec(starts,outMesh.getDimensions(),kernel); } return inCut; diff --git a/src/TNL/Functions/FunctionAdapter.h b/src/TNL/Functions/FunctionAdapter.h index b9c35886689bf254eccfd4154469be71462107d3..b763ee47631d10522038b8dad9f927e83bc37a88 100644 --- a/src/TNL/Functions/FunctionAdapter.h +++ b/src/TNL/Functions/FunctionAdapter.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Functions/Domain.h> diff --git a/src/TNL/Functions/MeshFunction.h b/src/TNL/Functions/MeshFunction.h index f7d6749c9ab1bd7a8a850da97fc2b7d344f43dcd..2b7069c0fa45808d2e04ddf09709fe310cfdb18f 100644 --- a/src/TNL/Functions/MeshFunction.h +++ b/src/TNL/Functions/MeshFunction.h @@ -60,10 +60,6 @@ class MeshFunction : Pointers::SharedPointer< Vector >& data, const IndexType& offset = 0 ); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Functions/MeshFunctionGnuplotWriter.h b/src/TNL/Functions/MeshFunctionGnuplotWriter.h index d747e84a75d6c75d4029b49761ad17efdaf72368..244146ff6d9eea06068f7ac1b61379236fac7e02 100644 --- a/src/TNL/Functions/MeshFunctionGnuplotWriter.h +++ b/src/TNL/Functions/MeshFunctionGnuplotWriter.h @@ -68,11 +68,10 @@ template< typename MeshFunction, class MeshFunctionGnuplotWriter : public MeshFunctionGnuplotWriterBase { - public: - - using MeshType = typename MeshFunction::MeshType; - using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; - using GlobalIndex = typename MeshType::GlobalIndexType; +public: + using MeshType = typename MeshFunction::MeshType; + using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; + using GlobalIndex = typename MeshType::GlobalIndexType; static bool write( const MeshFunction& function, std::ostream& str, @@ -99,11 +98,10 @@ template< typename MeshFunction, class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 2, Real, Device, Index >, EntityDimension > : public MeshFunctionGnuplotWriterBase { - public: - - using MeshType = typename MeshFunction::MeshType; - using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; - using GlobalIndex = typename MeshType::GlobalIndexType; +public: + using MeshType = typename MeshFunction::MeshType; + using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; + using GlobalIndex = typename MeshType::GlobalIndexType; static bool write( const MeshFunction& function, std::ostream& str, @@ -137,11 +135,10 @@ template< typename MeshFunction, class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, Index >, EntityDimension > : public MeshFunctionGnuplotWriterBase { - public: - - using MeshType = typename MeshFunction::MeshType; - using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; - using GlobalIndex = typename MeshType::GlobalIndexType; +public: + using MeshType = typename MeshFunction::MeshType; + using EntityType = typename MeshType::template EntityType< MeshFunction::getEntitiesDimension() >; + using GlobalIndex = typename MeshType::GlobalIndexType; static bool write( const MeshFunction& function, std::ostream& str, @@ -167,6 +164,5 @@ class MeshFunctionGnuplotWriter< MeshFunction, Meshes::Grid< 3, Real, Device, In } }; - } // namespace Functions } // namespace TNL diff --git a/src/TNL/Functions/MeshFunctionVTKWriter.h b/src/TNL/Functions/MeshFunctionVTKWriter.h index 78608de7461dc5510d280d29b9f7c329836e3eb8..201178c61197da4941f3c06af7914ec2428245b6 100644 --- a/src/TNL/Functions/MeshFunctionVTKWriter.h +++ b/src/TNL/Functions/MeshFunctionVTKWriter.h @@ -13,7 +13,7 @@ #include <TNL/Meshes/Writers/VTKWriter.h> namespace TNL { -namespace Functions { +namespace Functions { template< typename MeshFunction > class MeshFunctionVTKWriter diff --git a/src/TNL/Functions/MeshFunction_impl.h b/src/TNL/Functions/MeshFunction_impl.h index 908a31a09d5a69fa629ef0690df77ba00c17ea31..0131cbb25d653730f429306f8ae1a7226e4f41b9 100644 --- a/src/TNL/Functions/MeshFunction_impl.h +++ b/src/TNL/Functions/MeshFunction_impl.h @@ -92,30 +92,6 @@ MeshFunction( const MeshPointer& meshPointer, this->data.bind( *data, offset, getMesh().template getEntitiesCount< typename Mesh::template EntityType< MeshEntityDimension > >() ); } -template< typename Mesh, - int MeshEntityDimension, - typename Real > -String -MeshFunction< Mesh, MeshEntityDimension, Real >:: -getType() -{ - return String( "Functions::MeshFunction< " ) + - Mesh::getType() + ", " + - convertToString( MeshEntityDimension ) + ", " + - TNL::getType< Real >() + - " >"; -}; - -template< typename Mesh, - int MeshEntityDimension, - typename Real > -String -MeshFunction< Mesh, MeshEntityDimension, Real >:: -getTypeVirtual() const -{ - return this->getType(); -}; - template< typename Mesh, int MeshEntityDimension, typename Real > @@ -124,10 +100,10 @@ MeshFunction< Mesh, MeshEntityDimension, Real >:: getSerializationType() { return String( "Functions::MeshFunction< " ) + - Mesh::getSerializationType() + ", " + - convertToString( MeshEntityDimension ) + ", " + - TNL::getType< Real >() + - " >"; + TNL::getSerializationType< Mesh >() + ", " + + convertToString( MeshEntityDimension ) + ", " + + getType< Real >() + + " >"; }; template< typename Mesh, diff --git a/src/TNL/Functions/OperatorFunction.h b/src/TNL/Functions/OperatorFunction.h index 1f1e89b029f5d5e816e6f6df4bc6d9e9d27bb377..cc46d557a10cbfc315d04275d613de0ce679dce0 100644 --- a/src/TNL/Functions/OperatorFunction.h +++ b/src/TNL/Functions/OperatorFunction.h @@ -11,7 +11,7 @@ #pragma once #include <type_traits> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Functions/MeshFunction.h> #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h> diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h index e2bdce1f1c4a72848e82f10d9c270099121c28b7..918f24107d0e4a27a24d414d6cdcbea4f885cb45 100644 --- a/src/TNL/Functions/TestFunction_impl.h +++ b/src/TNL/Functions/TestFunction_impl.h @@ -11,6 +11,8 @@ #pragma once #include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/MemoryHelpers.h> + #include <TNL/Functions/Analytic/Constant.h> #include <TNL/Functions/Analytic/ExpBump.h> #include <TNL/Functions/Analytic/SinBumps.h> @@ -137,7 +139,7 @@ setupFunction( const Config::ParameterContainer& parameters, } if( std::is_same< Device, Devices::Cuda >::value ) { - this->function = Devices::Cuda::passToDevice( *auxFunction ); + this->function = Cuda::passToDevice( *auxFunction ); delete auxFunction; TNL_CHECK_CUDA_DEVICE; } @@ -166,7 +168,7 @@ setupOperator( const Config::ParameterContainer& parameters, } if( std::is_same< Device, Devices::Cuda >::value ) { - this->operator_ = Devices::Cuda::passToDevice( *auxOperator ); + this->operator_ = Cuda::passToDevice( *auxOperator ); delete auxOperator; TNL_CHECK_CUDA_DEVICE; } @@ -736,7 +738,7 @@ deleteFunction() if( std::is_same< Device, Devices::Cuda >::value ) { if( function ) - Devices::Cuda::freeFromDevice( ( FunctionType * ) function ); + Cuda::freeFromDevice( ( FunctionType * ) function ); } } @@ -756,7 +758,7 @@ deleteOperator() if( std::is_same< Device, Devices::Cuda >::value ) { if( operator_ ) - Devices::Cuda::freeFromDevice( ( OperatorType * ) operator_ ); + Cuda::freeFromDevice( ( OperatorType * ) operator_ ); } } @@ -912,7 +914,7 @@ printFunction( std::ostream& str ) const } if( std::is_same< Device, Devices::Cuda >::value ) { - Devices::Cuda::print( f, str ); + Cuda::print( f, str ); return str; } } diff --git a/src/TNL/Functions/VectorField.h b/src/TNL/Functions/VectorField.h index 4db601c9f4ccd9c003c46f41a112501953a73a76..4f06cd368f5ed55ef303cc53bcaadf3db7332fd1 100644 --- a/src/TNL/Functions/VectorField.h +++ b/src/TNL/Functions/VectorField.h @@ -113,19 +113,6 @@ class VectorField< Size, MeshFunction< Mesh, MeshEntityDimension, Real > > this->vectorField[ i ]->setMesh( meshPointer ); }; - static String getType() - { - return String( "Functions::VectorField< " ) + - convertToString( Size) + ", " + - FunctionType::getType() + - " >"; - } - - String getTypeVirtual() const - { - return this->getType(); - } - static String getSerializationType() { return String( "Functions::VectorField< " ) + diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter.h b/src/TNL/Functions/VectorFieldGnuplotWriter.h index 41b59d511d680568d62ed545a8f230efc43dd575..a1a63883e8387b1bbe94cb463ae2d39286105570 100644 --- a/src/TNL/Functions/VectorFieldGnuplotWriter.h +++ b/src/TNL/Functions/VectorFieldGnuplotWriter.h @@ -16,15 +16,15 @@ namespace TNL { namespace Functions { template< int, typename > class VectorField; +template< typename, int, typename > class MeshFunction; template< typename VectorField > class VectorFieldGnuplotWriter { - public: - - static bool write( const VectorField& function, - std::ostream& str, - const double& scale ); +public: + static bool write( const VectorField& function, + std::ostream& str, + const double& scale ); }; /*** @@ -37,14 +37,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > > { - public: - typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; /*** @@ -57,14 +57,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > > { - public: - typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 1, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; @@ -78,14 +78,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > > { - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; /*** @@ -98,14 +98,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > > { - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; /*** @@ -118,14 +118,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > > { - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 2, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; @@ -139,14 +139,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > > { - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; /*** @@ -159,14 +159,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > > { - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; /*** @@ -179,14 +179,14 @@ template< typename MeshReal, int VectorFieldSize > class VectorFieldGnuplotWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > > { - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); +public: + using MeshType = Meshes::Grid< 3, MeshReal, Device, MeshIndex >; + using RealType = Real; + using VectorFieldType = Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > >; + + static bool write( const VectorFieldType& function, + std::ostream& str, + const double& scale ); }; } // namespace Functions diff --git a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h index 500bdc4d8fbb0d3ece8c94ec6938c4c81a51b1c8..151ad5e7b178b7b0768ea794dbe0e03ca53b2f9a 100644 --- a/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h +++ b/src/TNL/Functions/VectorFieldGnuplotWriter_impl.h @@ -23,7 +23,7 @@ write( const VectorField& vectorField, std::ostream& str, const double& scale ) { - std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl; + std::cerr << "Gnuplot writer for mesh vectorFields defined on mesh type " << getType< typename VectorField::MeshType >() << " is not (yet) implemented." << std::endl; return false; } @@ -43,9 +43,8 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Cell entity( mesh ); - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + auto& c = entity.getCoordinates(); + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -73,9 +72,8 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Vertex entity( mesh ); - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() <= mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + auto& c = entity.getCoordinates(); + for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -104,13 +102,10 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Cell entity( mesh ); - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() < mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + auto& c = entity.getCoordinates(); + for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -142,15 +137,12 @@ write( const VectorFieldType& vectorField, typedef typename MeshType::Face EntityType; typedef typename EntityType::EntityOrientationType EntityOrientation; EntityType entity( mesh ); + auto& c = entity.getCoordinates(); entity.setOrientation( EntityOrientation( 1.0, 0.0 ) ); - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() < mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() <= mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -163,15 +155,9 @@ write( const VectorFieldType& vectorField, } entity.setOrientation( EntityOrientation( 0.0, 1.0 ) ); - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) - + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) { - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() <= mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) - + for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -202,13 +188,10 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Vertex entity( mesh ); - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() <= mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + auto& c = entity.getCoordinates(); + for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() <= mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -239,16 +222,11 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Cell entity( mesh ); - for( entity.getCoordinates().z() = 0; - entity.getCoordinates().z() < mesh.getDimensions().z(); - entity.getCoordinates().z() ++ ) - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() < mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + auto& c = entity.getCoordinates(); + for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ ) + for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -280,18 +258,13 @@ write( const VectorFieldType& vectorField, typedef typename MeshType::Face EntityType; typedef typename EntityType::EntityOrientationType EntityOrientation; EntityType entity( mesh ); + auto& c = entity.getCoordinates(); entity.setOrientation( EntityOrientation( 1.0, 0.0, 0.0 ) ); - for( entity.getCoordinates().z() = 0; - entity.getCoordinates().z() < mesh.getDimensions().z(); - entity.getCoordinates().z() ++ ) - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() < mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ ) + for( c.y() = 0; c.y() < mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() <= mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -304,16 +277,10 @@ write( const VectorFieldType& vectorField, } entity.setOrientation( EntityOrientation( 0.0, 1.0, 0.0 ) ); - for( entity.getCoordinates().z() = 0; - entity.getCoordinates().z() < mesh.getDimensions().z(); - entity.getCoordinates().z() ++ ) - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ ) + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) { - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() <= mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -326,16 +293,10 @@ write( const VectorFieldType& vectorField, } entity.setOrientation( EntityOrientation( 0.0, 0.0, 1.0 ) ); - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() < mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() <= mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + for( c.x() = 0; c.x() < mesh.getDimensions().x(); c.x()++ ) + for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().z() = 0; - entity.getCoordinates().z() < mesh.getDimensions().z(); - entity.getCoordinates().z() ++ ) + for( c.z() = 0; c.z() < mesh.getDimensions().z(); c.z()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -366,16 +327,11 @@ write( const VectorFieldType& vectorField, { const MeshType& mesh = vectorField.getMesh(); typename MeshType::Vertex entity( mesh ); - for( entity.getCoordinates().z() = 0; - entity.getCoordinates().z() <= mesh.getDimensions().z(); - entity.getCoordinates().z() ++ ) - for( entity.getCoordinates().y() = 0; - entity.getCoordinates().y() <= mesh.getDimensions().y(); - entity.getCoordinates().y() ++ ) + auto& c = entity.getCoordinates(); + for( c.z() = 0; c.z() <= mesh.getDimensions().z(); c.z()++ ) + for( c.y() = 0; c.y() <= mesh.getDimensions().y(); c.y()++ ) { - for( entity.getCoordinates().x() = 0; - entity.getCoordinates().x() <= mesh.getDimensions().x(); - entity.getCoordinates().x() ++ ) + for( c.x() = 0; c.x() <= mesh.getDimensions().x(); c.x()++ ) { entity.refresh(); typename MeshType::PointType v = entity.getCenter(); @@ -391,4 +347,3 @@ write( const VectorFieldType& vectorField, } // namespace Functions } // namespace TNL - diff --git a/src/TNL/Functions/VectorFieldVTKWriter.h b/src/TNL/Functions/VectorFieldVTKWriter.h index 6d8b1a8535b25e076c83e688706f37490086c39d..5eceea57fe21d52055294ca3f22a437988e39701 100644 --- a/src/TNL/Functions/VectorFieldVTKWriter.h +++ b/src/TNL/Functions/VectorFieldVTKWriter.h @@ -2,7 +2,7 @@ VectorFieldVTKWriter.h - description ------------------- begin : Jan 10, 2018 - copyright : (C) 2018 by oberhuber + copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -10,255 +10,52 @@ #pragma once -#include <TNL/Meshes/Grid.h> +#include <TNL/Meshes/Writers/VTKWriter.h> namespace TNL { namespace Functions { -template< int, typename > class VectorField; - template< typename VectorField > class VectorFieldVTKWriter { - public: - - static bool write( const VectorField& vectorField, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorField& vectorField, - std::ostream& str ){} - -}; - -/*** - * 1D grids cells - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > > -{ - public: - typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 1D grids vertices - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > > -{ - public: - typedef Meshes::Grid< 1, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - - -/*** - * 2D grids cells - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > > -{ - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 2D grids faces - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > > -{ - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 2D grids vertices - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > > -{ - public: - typedef Meshes::Grid< 2, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - - -/*** - * 3D grids cells - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > > -{ - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 3, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 3D grids faces - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > > -{ - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 2, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 3D grids edges - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > > -{ - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 1, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - -}; - -/*** - * 3D grids vertices - */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -class VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > > -{ - public: - typedef Meshes::Grid< 3, MeshReal, Device, MeshIndex > MeshType; - typedef Real RealType; - typedef Functions::VectorField< VectorFieldSize, MeshFunction< MeshType, 0, RealType > > VectorFieldType; - using VectorType = typename VectorFieldType::VectorType; - - static bool write( const VectorFieldType& function, - std::ostream& str, - const double& scale ); - - static void writeHeader( const VectorFieldType& vectorField, - std::ostream& str ); - + using MeshType = typename VectorField::MeshType; + using MeshWriter = Meshes::Writers::VTKWriter< MeshType >; + using EntityType = typename MeshType::template EntityType< VectorField::getEntitiesDimension() >; + using GlobalIndex = typename MeshType::GlobalIndexType; + +public: + static bool write( const VectorField& field, + std::ostream& str, + const double& scale = 1.0, + const String& fieldName = "cellVectorFieldValues" ) + { + const MeshType& mesh = field.getMesh(); + MeshWriter::template writeEntities< VectorField::getEntitiesDimension() >( mesh, str ); + appendField( field, str, fieldName, scale ); + return true; + } + + // VTK supports writing multiple fields into the same file. + // You can call this after 'write', which initializes the mesh entities, + // with different field name. + static void appendField( const VectorField& field, + std::ostream& str, + const String& fieldName, + const double& scale = 1.0 ) + { + const MeshType& mesh = field.getMesh(); + const GlobalIndex entitiesCount = mesh.template getEntitiesCount< EntityType >(); + str << std::endl << "CELL_DATA " << entitiesCount << std::endl; + str << "VECTORS " << fieldName << " " << getType< typename VectorField::RealType >() << " 1" << std::endl; + for( GlobalIndex i = 0; i < entitiesCount; i++ ) { + const typename VectorField::VectorType vector = field.getElement( i ); + static_assert( VectorField::getVectorDimension() <= 3, "The VTK format supports only up to 3D vector fields." ); + for( int i = 0; i < 3; i++ ) + str << scale * ( i < vector.getSize() ? vector[ i ] : 0.0 ) << " "; + str << "\n"; + } + } }; } // namespace Functions } // namespace TNL - -#include <TNL/Functions/VectorFieldVTKWriter_impl.h> diff --git a/src/TNL/Functions/VectorFieldVTKWriter_impl.h b/src/TNL/Functions/VectorFieldVTKWriter_impl.h deleted file mode 100644 index 938227d22b57f61d2f6c4d5f4b7b13a9044d3aa8..0000000000000000000000000000000000000000 --- a/src/TNL/Functions/VectorFieldVTKWriter_impl.h +++ /dev/null @@ -1,881 +0,0 @@ -/*************************************************************************** - VectorFieldVTKWriter_impl.h - description - ------------------- - begin : Jan 10, 2018 - copyright : (C) 2018 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Functions/VectorFieldVTKWriter.h> -#include <TNL/Functions/VectorField.h> - -namespace TNL { -namespace Functions { - -template< typename VectorField > -bool -VectorFieldVTKWriter< VectorField >:: -write( const VectorField& vectorField, - std::ostream& str, - const double& scale ) -{ - std::cerr << "VTK writer for vector field defined on mesh type " << VectorField::MeshType::getType() << " is not (yet) implemented." << std::endl; - return false; -} - -/**** - * 1D grid, cells - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 1, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType origin = mesh.getOrigin().x(); - const RealType spaceStep = mesh.getSpaceSteps().x(); - - str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl; - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << origin + i * spaceStep << " 0 0" << std::endl; - } - - str << std::endl << "CELLS " << mesh.getDimensions().x() << " " << mesh.getDimensions().x() * 3 << std::endl; - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "2 " << i << " " << i+1 << std::endl; - } - - str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() << std::endl; - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "3 " << std::endl; - } - - str << std::endl << "CELL_DATA " << mesh.getDimensions().x() << std::endl; - str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Cell >(); i++ ) - { - typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 1D grid, vertices - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, 0, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType origin = mesh.getOrigin().x(); - const RealType spaceStep = mesh.getSpaceSteps().x(); - - str << "POINTS " << mesh.getDimensions().x() + 1 << " float" << std::endl; - for (int i = 0; i < mesh.getDimensions().x() + 1; i++) - { - str << origin + i * spaceStep << " 0 0" << std::endl; - } - - str << std::endl << "CELLS " << mesh.getDimensions().x() + 1 << " " << ( mesh.getDimensions().x() + 1 ) * 2 << std::endl; - for (int i = 0; i < mesh.getDimensions().x() + 1; i++) - { - str << "1 " << i << std::endl; - } - - str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() + 1 << std::endl; - for (int i = 0; i < mesh.getDimensions().x() + 1; i++) - { - str << "1 " << std::endl; - } - - str << std::endl << "CELL_DATA " << mesh.getDimensions().x() + 1 << std::endl; - str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < mesh.template getEntitiesCount< typename MeshType::Vertex >(); i++ ) - { - typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 2D grid, cells - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 2, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >(); - - str << "POINTS " << verticesCount << " " << getType< RealType >() << std::endl; - for (int j = 0; j < mesh.getDimensions().y() + 1; j++) - { - for (int i = 0; i < mesh.getDimensions().x() + 1; i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl; - } - } - - str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl; - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "4 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << j * ( mesh.getDimensions().x() + 1 )+ i + 1 << - " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl; - } - } - - str << std::endl << "CELL_TYPES " << mesh.getDimensions().x() * mesh.getDimensions().y() << std::endl; - for (int i = 0; i < mesh.getDimensions().x()*mesh.getDimensions().y(); i++) - { - str << "8 " << std::endl; - } - - str << std::endl << "CELL_DATA " << entitiesCount << std::endl; - str << "VECTORS cellVectorFieldValues " << getType< RealType >() << std::endl; - - for( MeshIndex i = 0; i < entitiesCount; i++ ) - { - typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - { - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ); - if( i < 2 ) - str << " "; - } - str << std::endl; - } - - return true; -} - -/**** - * 2D grid, faces - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - typedef typename MeshType::template EntityType< 0 > Vertex; - typedef typename MeshType::template EntityType< 1 > Face; - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++) - { - for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl; - } - } - - str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl; - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++) - { - str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " << (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl; - } - } - - for (int j = 0; j < (mesh.getDimensions().y()+1); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "2 " << j * ( mesh.getDimensions().x() + 1 ) + i << " " <<j * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl; - } - } - - str << std::endl << "CELL_TYPES " << entitiesCount << std::endl; - for (int i = 0; i < entitiesCount; i++) - { - str << "3" << std::endl; - } - - str << std::endl << "CELL_DATA " << entitiesCount << std::endl; - str << "VECTORS FaceslVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < entitiesCount; i++ ) - { - typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 2D grid, vertices - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 0, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - typedef typename MeshType::template EntityType< 0 > Vertex; - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int j = 0; j < ( mesh.getDimensions().y() + 1); j++) - { - for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " 0" << std::endl; - } - } - - str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl; - for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++) - { - for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++) - { - str << "1 " << j * mesh.getDimensions().x() + i << std::endl; - } - } - - str << std::endl << "CELL_TYPES " << verticesCount << std::endl; - for (int i = 0; i < verticesCount; i++) - { - str << "1" << std::endl; - } - - str << std::endl << "CELL_DATA " << verticesCount << std::endl; - str << "VECTORS VerticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < verticesCount; i++ ) - { - typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 3D grid, cells - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 3, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const RealType originZ = mesh.getOrigin().z(); - const RealType spaceStepZ = mesh.getSpaceSteps().z(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Cell >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int k = 0; k <= mesh.getDimensions().y(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " << - originZ + k * spaceStepZ << std::endl; - } - } - } - - str << std::endl << "CELLS " << entitiesCount << " " << - entitiesCount * 9 << std::endl; - for (int k = 0; k < mesh.getDimensions().z(); k++) - { - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "8 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl; - } - } - } - - str << std::endl << "CELL_TYPES " << entitiesCount << std::endl; - for (int i = 0; i < entitiesCount; i++) - { - str << "11" << std::endl; - } - - str << std::endl << "CELL_DATA " << entitiesCount << std::endl; - str << "VECTORS cellVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < entitiesCount; i++ ) - { - typename MeshType::Cell entity = mesh.template getEntity< typename MeshType::Cell >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 3D grid, faces - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const RealType originZ = mesh.getOrigin().z(); - const RealType spaceStepZ = mesh.getSpaceSteps().z(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Face >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int k = 0; k <= mesh.getDimensions().y(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " << - originZ + k * spaceStepZ << std::endl; - } - } - } - - str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 5 << std::endl; - for (int k = 0; k < mesh.getDimensions().z(); k++) - { - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl; - } - } - } - - for (int k = 0; k < mesh.getDimensions().z(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl; - } - } - } - - for (int k = 0; k <= mesh.getDimensions().z(); k++) - { - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "4 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i + 1<< std::endl; - } - } - } - - str << std::endl << "CELL_TYPES " << entitiesCount << std::endl; - for (int i = 0; i < entitiesCount; i++) - { - str << "8" << std::endl; - } - - str << std::endl << "CELL_DATA " << entitiesCount << std::endl; - str << "VECTORS facesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < entitiesCount; i++ ) - { - typename MeshType::Face entity = mesh.template getEntity< typename MeshType::Face >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 3D grid, edges - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 1, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const RealType originZ = mesh.getOrigin().z(); - const RealType spaceStepZ = mesh.getSpaceSteps().z(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - const MeshIndex entitiesCount = mesh.template getEntitiesCount< typename MeshType::Edge >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int k = 0; k <= mesh.getDimensions().y(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " << - originZ + k * spaceStepZ << std::endl; - } - } - } - - str << std::endl << "CELLS " << entitiesCount << " " << entitiesCount * 3 << std::endl; - for (int k = 0; k <= mesh.getDimensions().z(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i < mesh.getDimensions().x(); i++) - { - str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i + 1 << std::endl; - } - } - } - - for (int k = 0; k <= mesh.getDimensions().z(); k++) - { - for (int j = 0; j < mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + (j+1) * ( mesh.getDimensions().x() + 1 ) + i << std::endl; - } - } - } - - for (int k = 0; k < mesh.getDimensions().z(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << "3 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << " " - << (k+1) * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << std::endl; - } - } - } - - str << std::endl << "CELL_TYPES " << entitiesCount << std::endl; - for (int i = 0; i < entitiesCount; i++) - { - str << "3" << std::endl; - } - - str << std::endl << "CELL_DATA " << entitiesCount << std::endl; - str << "VECTORS edgesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < entitiesCount; i++ ) - { - typename MeshType::Edge entity = mesh.template getEntity< typename MeshType::Edge >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -/**** - * 3D grid, vertices - */ - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -void -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >:: -writeHeader( const VectorFieldType& vectorField, - std::ostream& str ) -{ - const MeshType& mesh = vectorField.getMesh(); - const typename MeshType::PointType& origin = mesh.getOrigin(); - const typename MeshType::PointType& proportions = mesh.getProportions(); - str << "# vtk DataFile Version 2.0" << std::endl; - str << "TNL DATA" << std::endl; - str << "ASCII" << std::endl; - str << "DATASET UNSTRUCTURED_GRID" << std::endl; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - int VectorFieldSize > -bool -VectorFieldVTKWriter< VectorField< VectorFieldSize, MeshFunction< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 0, Real > > >:: -write( const VectorFieldType& vectorField, - std::ostream& str, - const double& scale ) -{ - writeHeader(vectorField, str); - - const MeshType& mesh = vectorField.getMesh(); - const RealType originX = mesh.getOrigin().x(); - const RealType spaceStepX = mesh.getSpaceSteps().x(); - const RealType originY = mesh.getOrigin().y(); - const RealType spaceStepY = mesh.getSpaceSteps().y(); - const RealType originZ = mesh.getOrigin().z(); - const RealType spaceStepZ = mesh.getSpaceSteps().z(); - const MeshIndex verticesCount = mesh.template getEntitiesCount< typename MeshType::Vertex >(); - - str << "POINTS " << verticesCount << " float" << std::endl; - for (int k = 0; k <= mesh.getDimensions().y(); k++) - { - for (int j = 0; j <= mesh.getDimensions().y(); j++) - { - for (int i = 0; i <= mesh.getDimensions().x(); i++) - { - str << originX + i * spaceStepX << " " << originY + j * spaceStepY << " " << - originZ + k * spaceStepZ << std::endl; - } - } - } - - str << std::endl << "CELLS " << verticesCount << " " << verticesCount * 2 << std::endl; - for (int k = 0; k < ( mesh.getDimensions().z() + 1 ); k++) - { - for (int j = 0; j < ( mesh.getDimensions().y() + 1 ); j++) - { - for (int i = 0; i < ( mesh.getDimensions().x() + 1 ); i++) - { - str << "1 " << k * ( mesh.getDimensions().y() + 1 ) * ( mesh.getDimensions().x() + 1 ) + j * ( mesh.getDimensions().x() + 1 ) + i << std::endl; - } - } - } - - str << std::endl << "CELL_TYPES " << verticesCount << std::endl; - for (int i = 0; i < verticesCount; i++) - { - str << "1" << std::endl; - } - - str << std::endl << "CELL_DATA " << verticesCount << std::endl; - str << "VECTORS verticesVectorFieldValues " << getType< typename VectorFieldType::RealType >() << std::endl; - - for( MeshIndex i = 0; i < verticesCount; i++ ) - { - typename MeshType::Vertex entity = mesh.template getEntity< typename MeshType::Vertex >( i ); - entity.refresh(); - const VectorType v = vectorField.getElement( entity.getIndex() ); - for( int i = 0; i < 3; i++ ) - str << scale * ( i < VectorFieldSize ? v[ i ] : 0.0 ) << " "; - str << std::endl; - } - - return true; -} - -} // namespace Functions -} // namespace TNL diff --git a/src/TNL/Images/DicomSeries.h b/src/TNL/Images/DicomSeries.h index 36e626ab6691a32b3c207d25aac294600b84ac65..b5aa77a57d3a2c3322cc7d79b6a31ff7c22e68d4 100644 --- a/src/TNL/Images/DicomSeries.h +++ b/src/TNL/Images/DicomSeries.h @@ -14,10 +14,11 @@ #pragma once +#include <list> + #include <TNL/Containers/Array.h> -#include <TNL/Containers/List.h> #include <TNL/String.h> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> #include <TNL/Images//Image.h> #include <TNL/Images//DicomHeader.h> #include <TNL/Images//RegionOfInterest.h> @@ -33,10 +34,7 @@ #include <string> namespace TNL { - -template<> inline String getType< Images::DicomHeader * > () { return String( "DicomHeader *" ); } - -namespace Images { +namespace Images { struct WindowCenterWidth { @@ -105,7 +103,7 @@ class DicomSeries : public Image< int > bool loadImage( const String& filePath, int number ); - Containers::List< String > fileList; + std::list< String > fileList; Containers::Array<DicomHeader *,Devices::Host,int> dicomSeriesHeaders; diff --git a/src/TNL/Images/DicomSeries_impl.h b/src/TNL/Images/DicomSeries_impl.h index 350bf384bbadae0eeb1045e1553010953f6a9390..533808b0d53559aee3c56fbd268194fafc7ecd34 100644 --- a/src/TNL/Images/DicomSeries_impl.h +++ b/src/TNL/Images/DicomSeries_impl.h @@ -155,22 +155,22 @@ inline bool DicomSeries::retrieveFileList( const String& filePath) String fileNamePrefix(fileName.getString(), 0, fileName.getLength() - separatorPosition); struct dirent **dirp; - Containers::List<String > files; + std::list< String > files; //scan and sort directory int ndirs = scandir(directoryPath.getString(), &dirp, filter, alphasort); for(int i = 0 ; i < ndirs; ++i) { - files.Append( String((char *)dirp[i]->d_name)); + files.push_back( String((char *)dirp[i]->d_name) ); delete dirp[i]; } - for (int i = 0; i < files.getSize(); i++) + for (auto& file : files) { //check if file prefix contained - if (strstr(files[ i ].getString(), fileNamePrefix.getString())) + if (strstr(file.getString(), fileNamePrefix.getString())) { - fileList.Append( directoryPath + files[ i ] ); + fileList.push_back( directoryPath + file ); } } } @@ -182,7 +182,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number) #ifdef HAVE_DCMTK_H //load header DicomHeader *header = new DicomHeader(); - dicomSeriesHeaders.setSize( fileList.getSize() ); + dicomSeriesHeaders.setSize( fileList.size() ); dicomSeriesHeaders.setElement( number, header ); if( !header->loadFromFile( filePath ) ) return false; @@ -283,7 +283,7 @@ inline bool DicomSeries::loadImage( const String& filePath, int number) imagesInfo.frameSize = size; if (pixelData) delete pixelData; - pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.getSize()]; + pixelData = new Uint16[imagesInfo.frameUintsCount * fileList.size()]; } else {//check image size for compatibility @@ -328,13 +328,14 @@ inline bool DicomSeries::loadDicomSeries( const String& filePath ) } //load images - int imagesCountToLoad = fileList.getSize(); - for( int i=0; i < imagesCountToLoad; i++ ) + int counter = 0; + for( auto& file : fileList ) { - if( !loadImage( fileList[ i ].getString(),i ) ) + if( !loadImage( file.getString(), counter ) ) { - std::cerr << fileList[ i ] << " skipped"; + std::cerr << file << " skipped"; } + counter++; } return true; } diff --git a/src/TNL/Logger.h b/src/TNL/Logger.h index d1f6c5c678c7c80c241e99f7d67d13f24403dabc..efcbbb3b5a1db18df8ae59827d30d13185d39188 100644 --- a/src/TNL/Logger.h +++ b/src/TNL/Logger.h @@ -12,6 +12,7 @@ #include <ostream> +#include <TNL/String.h> #include <TNL/Config/ParameterContainer.h> namespace TNL { diff --git a/src/TNL/Logger_impl.h b/src/TNL/Logger_impl.h index 0e1dd8dc62434faf07b64a95f1f896ed9b8af940..6f71b40277515ce60ffd3a09082e140731f1a4b5 100644 --- a/src/TNL/Logger_impl.h +++ b/src/TNL/Logger_impl.h @@ -14,8 +14,8 @@ #include <iomanip> #include <TNL/Logger.h> -#include <TNL/Devices/CudaDeviceInfo.h> -#include <TNL/Devices/SystemInfo.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/SystemInfo.h> namespace TNL { @@ -61,24 +61,24 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters ) const char* compiler_name = "(unknown)"; #endif - writeParameter< String >( "Host name:", Devices::SystemInfo::getHostname() ); - writeParameter< String >( "System:", Devices::SystemInfo::getSystemName() ); - writeParameter< String >( "Release:", Devices::SystemInfo::getSystemRelease() ); - writeParameter< String >( "Architecture:", Devices::SystemInfo::getArchitecture() ); + writeParameter< String >( "Host name:", SystemInfo::getHostname() ); + writeParameter< String >( "System:", SystemInfo::getSystemName() ); + writeParameter< String >( "Release:", SystemInfo::getSystemRelease() ); + writeParameter< String >( "Architecture:", SystemInfo::getArchitecture() ); writeParameter< String >( "TNL compiler:", compiler_name ); // FIXME: generalize for multi-socket systems, here we consider only the first found CPU const int cpu_id = 0; - const int threads = Devices::SystemInfo::getNumberOfThreads( cpu_id ); - const int cores = Devices::SystemInfo::getNumberOfCores( cpu_id ); + const int threads = SystemInfo::getNumberOfThreads( cpu_id ); + const int cores = SystemInfo::getNumberOfCores( cpu_id ); int threadsPerCore = 0; if( cores > 0 ) threadsPerCore = threads / cores; writeParameter< String >( "CPU info", "" ); - writeParameter< String >( "Model name:", Devices::SystemInfo::getCPUModelName( cpu_id ), 1 ); + writeParameter< String >( "Model name:", SystemInfo::getCPUModelName( cpu_id ), 1 ); writeParameter< int >( "Cores:", cores, 1 ); writeParameter< int >( "Threads per core:", threadsPerCore, 1 ); - writeParameter< double >( "Max clock rate (in MHz):", Devices::SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 ); - const Devices::CacheSizes cacheSizes = Devices::SystemInfo::getCPUCacheSizes( cpu_id ); + writeParameter< double >( "Max clock rate (in MHz):", SystemInfo::getCPUMaxFrequency( cpu_id ) / 1000, 1 ); + const CacheSizes cacheSizes = SystemInfo::getCPUCacheSizes( cpu_id ); const String cacheInfo = convertToString( cacheSizes.L1data ) + ", " + convertToString( cacheSizes.L1instruction ) + ", " + convertToString( cacheSizes.L2 ) + ", " @@ -95,19 +95,19 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters ) // for( int i = 0; i < devices; i++ ) // { // logger.writeParameter< int >( "Device no.", i, 1 ); - const int i = Devices::CudaDeviceInfo::getActiveDevice(); - writeParameter< String >( "Name", Devices::CudaDeviceInfo::getDeviceName( i ), 2 ); - const String deviceArch = convertToString( Devices::CudaDeviceInfo::getArchitectureMajor( i ) ) + "." + - convertToString( Devices::CudaDeviceInfo::getArchitectureMinor( i ) ); + const int i = Cuda::DeviceInfo::getActiveDevice(); + writeParameter< String >( "Name", Cuda::DeviceInfo::getDeviceName( i ), 2 ); + const String deviceArch = convertToString( Cuda::DeviceInfo::getArchitectureMajor( i ) ) + "." + + convertToString( Cuda::DeviceInfo::getArchitectureMinor( i ) ); writeParameter< String >( "Architecture", deviceArch, 2 ); - writeParameter< int >( "CUDA cores", Devices::CudaDeviceInfo::getCudaCores( i ), 2 ); - const double clockRate = ( double ) Devices::CudaDeviceInfo::getClockRate( i ) / 1.0e3; + writeParameter< int >( "CUDA cores", Cuda::DeviceInfo::getCudaCores( i ), 2 ); + const double clockRate = ( double ) Cuda::DeviceInfo::getClockRate( i ) / 1.0e3; writeParameter< double >( "Clock rate (in MHz)", clockRate, 2 ); - const double globalMemory = ( double ) Devices::CudaDeviceInfo::getGlobalMemory( i ) / 1.0e9; + const double globalMemory = ( double ) Cuda::DeviceInfo::getGlobalMemory( i ) / 1.0e9; writeParameter< double >( "Global memory (in GB)", globalMemory, 2 ); - const double memoryClockRate = ( double ) Devices::CudaDeviceInfo::getMemoryClockRate( i ) / 1.0e3; + const double memoryClockRate = ( double ) Cuda::DeviceInfo::getMemoryClockRate( i ) / 1.0e3; writeParameter< double >( "Memory clock rate (in Mhz)", memoryClockRate, 2 ); - writeParameter< bool >( "ECC enabled", Devices::CudaDeviceInfo::getECCEnabled( i ), 2 ); + writeParameter< bool >( "ECC enabled", Cuda::DeviceInfo::getECCEnabled( i ), 2 ); // } } return true; @@ -116,7 +116,7 @@ Logger::writeSystemInformation( const Config::ParameterContainer& parameters ) inline void Logger::writeCurrentTime( const char* label ) { - writeParameter< String >( label, Devices::SystemInfo::getCurrentTime() ); + writeParameter< String >( label, SystemInfo::getCurrentTime() ); } template< typename T > diff --git a/src/TNL/Math.h b/src/TNL/Math.h index cd73b020e4c35fbe08c969864c9c26e400bd76ef..321cc7ce39e0d0beb8c3c3c2a5ab3ac7bbfddbdd 100644 --- a/src/TNL/Math.h +++ b/src/TNL/Math.h @@ -15,7 +15,7 @@ #include <algorithm> #include <TNL/TypeTraits.h> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { @@ -30,7 +30,7 @@ ResultType sum( const T1& a, const T2& b ) * \brief This function returns minimum of two numbers. * * GPU device code uses the functions defined in the CUDA's math_functions.h, - * MIC uses trivial override and host uses the STL functions. + * host uses the STL functions. */ template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type, // enable_if is necessary to avoid ambiguity in vector expressions @@ -44,8 +44,6 @@ ResultType min( const T1& a, const T2& b ) #else #if defined(__CUDA_ARCH__) return ::min( (ResultType) a, (ResultType) b ); - #elif defined(__MIC__) - return a < b ? a : b; #else return std::min( (ResultType) a, (ResultType) b ); #endif @@ -57,7 +55,7 @@ ResultType min( const T1& a, const T2& b ) * \brief This function returns maximum of two numbers. * * GPU device code uses the functions defined in the CUDA's math_functions.h, - * MIC uses trivial override and host uses the STL functions. + * host uses the STL functions. */ template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type, // enable_if is necessary to avoid ambiguity in vector expressions @@ -71,8 +69,6 @@ ResultType max( const T1& a, const T2& b ) #else #if defined(__CUDA_ARCH__) return ::max( (ResultType) a, (ResultType) b ); - #elif defined(__MIC__) - return a > b ? a : b; #else return std::max( (ResultType) a, (ResultType) b ); #endif @@ -92,10 +88,6 @@ T abs( const T& n ) return ::abs( n ); else return ::fabs( n ); -#elif defined(__MIC__) - if( n < ( T ) 0 ) - return -n; - return n; #else return std::abs( n ); #endif @@ -159,7 +151,7 @@ template< typename T1, typename T2, typename ResultType = typename std::common_t __cuda_callable__ inline ResultType pow( const T1& base, const T2& exp ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::pow( (ResultType) base, (ResultType) exp ); #else return std::pow( (ResultType) base, (ResultType) exp ); @@ -173,7 +165,7 @@ template< typename T > __cuda_callable__ inline auto exp( const T& value ) -> decltype( std::exp(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::exp( value ); #else return std::exp( value ); @@ -187,7 +179,7 @@ template< typename T > __cuda_callable__ inline auto sqrt( const T& value ) -> decltype( std::sqrt(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::sqrt( value ); #else return std::sqrt( value ); @@ -201,7 +193,7 @@ template< typename T > __cuda_callable__ inline auto cbrt( const T& value ) -> decltype( std::cbrt(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::cbrt( value ); #else return std::cbrt( value ); @@ -215,7 +207,7 @@ template< typename T > __cuda_callable__ inline auto log( const T& value ) -> decltype( std::log(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::log( value ); #else return std::log( value ); @@ -229,7 +221,7 @@ template< typename T > __cuda_callable__ inline auto log10( const T& value ) -> decltype( std::log10(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::log10( value ); #else return std::log10( value ); @@ -243,7 +235,7 @@ template< typename T > __cuda_callable__ inline auto log2( const T& value ) -> decltype( std::log2(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::log2( value ); #else return std::log2( value ); @@ -257,7 +249,7 @@ template< typename T > __cuda_callable__ inline auto sin( const T& value ) -> decltype( std::sin(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::sin( value ); #else return std::sin( value ); @@ -271,7 +263,7 @@ template< typename T > __cuda_callable__ inline auto cos( const T& value ) -> decltype( std::cos(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::cos( value ); #else return std::cos( value ); @@ -285,7 +277,7 @@ template< typename T > __cuda_callable__ inline auto tan( const T& value ) -> decltype( std::tan(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::tan( value ); #else return std::tan( value ); @@ -299,7 +291,7 @@ template< typename T > __cuda_callable__ inline auto asin( const T& value ) -> decltype( std::asin(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::asin( value ); #else return std::asin( value ); @@ -313,7 +305,7 @@ template< typename T > __cuda_callable__ inline auto acos( const T& value ) -> decltype( std::acos(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::acos( value ); #else return std::acos( value ); @@ -327,7 +319,7 @@ template< typename T > __cuda_callable__ inline auto atan( const T& value ) -> decltype( std::atan(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::atan( value ); #else return std::atan( value ); @@ -341,7 +333,7 @@ template< typename T > __cuda_callable__ inline auto sinh( const T& value ) -> decltype( std::sinh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::sinh( value ); #else return std::sinh( value ); @@ -355,7 +347,7 @@ template< typename T > __cuda_callable__ inline auto cosh( const T& value ) -> decltype( std::cosh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::cosh( value ); #else return std::cosh( value ); @@ -369,7 +361,7 @@ template< typename T > __cuda_callable__ inline auto tanh( const T& value ) -> decltype( std::tanh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::tanh( value ); #else return std::tanh( value ); @@ -383,7 +375,7 @@ template< typename T > __cuda_callable__ inline auto asinh( const T& value ) -> decltype( std::asinh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::asinh( value ); #else return std::asinh( value ); @@ -397,7 +389,7 @@ template< typename T > __cuda_callable__ inline auto acosh( const T& value ) -> decltype( std::acosh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::acosh( value ); #else return std::acosh( value ); @@ -411,7 +403,7 @@ template< typename T > __cuda_callable__ inline auto atanh( const T& value ) -> decltype( std::atanh(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::atanh( value ); #else return std::atanh( value ); @@ -425,7 +417,7 @@ template< typename T > __cuda_callable__ inline auto floor( const T& value ) -> decltype( std::floor(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::floor( value ); #else return std::floor( value ); @@ -439,7 +431,7 @@ template< typename T > __cuda_callable__ inline auto ceil( const T& value ) -> decltype( std::ceil(value) ) { -#if defined(__CUDA_ARCH__) || defined(__MIC__) +#if defined(__CUDA_ARCH__) return ::ceil( value ); #else return std::ceil( value ); diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h index dd6618d5e40e481e453c46b29b0988a067046b76..a50a17232fb184086930e69834ea4d498d9ab5a2 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/AdEllpack.h @@ -84,14 +84,13 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef AdEllpack< Real, Devices::Host, Index > HostType; - typedef AdEllpack< Real, Devices::Cuda, Index > CudaType; - AdEllpack(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = AdEllpack< _Real, _Device, _Index >; - String getTypeVirtual() const; + AdEllpack(); void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h index 12d7336b8f7ea3b46ace0df151475df354537ced..a0f293b3df94afcfeda8124f8e1d8173cb4c7718 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/AdEllpack_impl.h @@ -11,7 +11,7 @@ #include <TNL/Matrices/AdEllpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> #pragma once @@ -157,26 +157,6 @@ AdEllpack< Real, Device, Index >::AdEllpack() warpSize( 32 ) {} -template< typename Real, - typename Device, - typename Index > -String AdEllpack< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String AdEllpack< Real, Device, Index >::getType() -{ - return String( "AdEllpack< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device::getDeviceType() + - String( " >" ); -} - template< typename Real, typename Device, typename Index > @@ -956,14 +936,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda2( const InVector& inVector, OutVector& outVector, const int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; IndexType warpIdx = globalIdx >> 5; IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 ); if( globalIdx >= this->reduceMap.getSize() ) return; const int blockSize = 256; - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ IndexType reduceMap[ blockSize ]; reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ]; temp[ threadIdx.x ] = 0.0; @@ -1004,14 +984,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda4( const InVector& inVector, OutVector& outVector, const int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; IndexType warpIdx = globalIdx >> 5; IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 ); if( globalIdx >= this->reduceMap.getSize() ) return; const int blockSize = 192; - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ IndexType reduceMap[ blockSize ]; reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ]; temp[ threadIdx.x ] = 0.0; @@ -1063,14 +1043,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector, OutVector& outVector, const int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; IndexType warpIdx = globalIdx >> 5; IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 ); if( globalIdx >= this->reduceMap.getSize() ) return; const int blockSize = 128; - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ IndexType reduceMap[ blockSize ]; reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ]; temp[ threadIdx.x ] = 0.0; @@ -1121,14 +1101,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda16( const InVector& inVector, OutVector& outVector, const int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; IndexType warpIdx = globalIdx >> 5; IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 ); if( globalIdx >= this->reduceMap.getSize() ) return; const int blockSize = 128; - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ IndexType reduceMap[ blockSize ]; reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ]; temp[ threadIdx.x ] = 0.0; @@ -1179,14 +1159,14 @@ void AdEllpack< Real, Device, Index >::spmvCuda32( const InVector& inVector, OutVector& outVector, const int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; IndexType warpIdx = globalIdx >> 5; IndexType inWarpIdx = globalIdx & ( this->warpSize - 1 ); if( globalIdx >= this->reduceMap.getSize() ) return; const int blockSize = 96; - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ IndexType reduceMap[ blockSize ]; reduceMap[ threadIdx.x ] = this->reduceMap[ globalIdx ]; temp[ threadIdx.x ] = 0.0; @@ -1312,18 +1292,18 @@ public: { typedef AdEllpack< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); if( matrix.totalLoad < 2 ) { - dim3 blockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); - IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = blockSize.x * sizeof( Real ); AdEllpackVectorProductCuda2< Real, Index, InVector, OutVector > <<< cudaGridSize, blockSize, sharedMemory >>> @@ -1333,20 +1313,20 @@ public: gridIdx ); } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } else if( matrix.totalLoad < 4 ) { - dim3 blockSize( 192 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 blockSize( 192 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); - IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = blockSize.x * sizeof( Real ); AdEllpackVectorProductCuda4< Real, Index, InVector, OutVector > <<< cudaGridSize, blockSize, sharedMemory >>> @@ -1356,20 +1336,20 @@ public: gridIdx ); } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } else if( matrix.totalLoad < 8 ) { - dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); - IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = blockSize.x * sizeof( Real ); AdEllpackVectorProductCuda8< Real, Index, InVector, OutVector > <<< cudaGridSize, blockSize, sharedMemory >>> @@ -1379,20 +1359,20 @@ public: gridIdx ); } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } else if( matrix.totalLoad < 16 ) { - dim3 blockSize( 128 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); - IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = blockSize.x * sizeof( Real ); AdEllpackVectorProductCuda16< Real, Index, InVector, OutVector > <<< cudaGridSize, blockSize, sharedMemory >>> @@ -1402,20 +1382,20 @@ public: gridIdx ); } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } else { - dim3 blockSize( 96 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 blockSize( 96 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); - IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = blockSize.x * sizeof( Real ); AdEllpackVectorProductCuda32< Real, Index, InVector, OutVector > <<< cudaGridSize, blockSize, sharedMemory >>> @@ -1425,9 +1405,9 @@ public: gridIdx ); } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } } diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/BiEllpack.h index 08bb5366671ebbf6abb208d213027914a765ad2e..cfc132ccd56318a0c160d6eab943fc5de90b7c7c 100644 --- a/src/TNL/Matrices/BiEllpack.h +++ b/src/TNL/Matrices/BiEllpack.h @@ -39,14 +39,13 @@ public: typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef BiEllpack< Real, Devices::Host, Index > HostType; - typedef BiEllpack< Real, Devices::Cuda, Index > CudaType; - BiEllpack(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = BiEllpack< _Real, _Device, _Index >; - String getTypeVirtual() const; + BiEllpack(); void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/BiEllpackSymmetric.h index 0d2ae9f1e64d42a13d6849e185048b974a9ac61b..8a845a08372c0647af36c911ec59b79e6e857747 100644 --- a/src/TNL/Matrices/BiEllpackSymmetric.h +++ b/src/TNL/Matrices/BiEllpackSymmetric.h @@ -30,14 +30,13 @@ public: typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef BiEllpackSymmetric< Real, Devices::Host, Index > HostType; - typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > CudaType; - BiEllpackSymmetric(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = BiEllpackSymmetric< _Real, _Device, _Index >; - String getTypeVirtual() const; + BiEllpackSymmetric(); void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h index d4c755a236fa9833dbed19d7f70223cc6c7a0608..0af180c0e8c2c54d2c4fdb304fa3e2813d76786c 100644 --- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h @@ -45,28 +45,6 @@ BiEllpackSymmetric< Real, Device, Index, StripSize >::BiEllpackSymmetric() logWarpSize( 5 ) {} -template< typename Real, - typename Device, - typename Index, - int StripSize > -String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType() -{ - return String( "BiEllpackMatrix< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device :: getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index, - int StripSize > -String BiEllpackSymmetric< Real, Device, Index, StripSize >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index, @@ -1075,7 +1053,7 @@ void BiEllpackSymmetric< Real, Device, Index, StripSize >::spmvCuda( const InVec IndexType bisection = this->warpSize; IndexType groupBegin = strip * ( this->logWarpSize + 1 ); - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ Real results[ cudaBlockSize ]; results[ threadIdx.x ] = 0.0; IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx; @@ -1296,7 +1274,7 @@ void BiEllpackSymmetricVectorProductCuda( const BiEllpackSymmetric< Real, Device int gridIdx, const int warpSize ) { - Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; matrix->spmvCuda( *inVector, *outVector, globalIdx ); } #endif @@ -1416,7 +1394,7 @@ void performRowBubbleSortCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, S const typename BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize >::RowLengthsVector* rowLengths, int gridIdx ) { - const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx ); } #endif @@ -1431,7 +1409,7 @@ void computeColumnSizesCuda( BiEllpackSymmetric< Real, Devices::Cuda, Index, Str const Index numberOfStrips, int gridIdx ) { - const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx ); } #endif @@ -1535,23 +1513,23 @@ public: Index numberOfStrips = matrix.virtualRows / StripSize; typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); performRowBubbleSortCuda< Real, Index, StripSize > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, kernel_rowLengths, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -1566,15 +1544,15 @@ public: const Index numberOfStrips = matrix.virtualRows / StripSize; typedef BiEllpackSymmetric< Real, Devices::Cuda, Index, StripSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); computeColumnSizesCuda< Real, Index, StripSize > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, @@ -1582,8 +1560,8 @@ public: numberOfStrips, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -1601,16 +1579,16 @@ public: #ifdef HAVE_CUDA typedef BiEllpackSymmetric< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); BiEllpackSymmetricVectorProductCuda< Real, Index, StripSize, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> @@ -1620,9 +1598,9 @@ public: gridIdx, matrix.warpSize ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index 0be6ac4b068111b8dbb424142e16ed19f6de2477..51646152e8e62d8c26fb55a961869b8acef7826e 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -47,28 +47,6 @@ BiEllpack< Real, Device, Index, StripSize >::BiEllpack() logWarpSize( 5 ) {} -template< typename Real, - typename Device, - typename Index, - int StripSize > -String BiEllpack< Real, Device, Index, StripSize >::getType() -{ - return String( "BiEllpack< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device::getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index, - int StripSize > -String BiEllpack< Real, Device, Index, StripSize >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index, @@ -116,7 +94,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); - this->groupPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + this->groupPointers.template scan< Algorithms::ScanType::Exclusive >(); // uncomment to perform structure test //DeviceDependentCode::verifyRowPerm( *this, rowLengths ); @@ -1079,7 +1057,7 @@ void BiEllpack< Real, Device, Index, StripSize >::spmvCuda( const InVector& inVe IndexType bisection = this->warpSize; IndexType groupBegin = strip * ( this->logWarpSize + 1 ); - Real* temp = Devices::Cuda::getSharedMemory< Real >(); + Real* temp = Cuda::getSharedMemory< Real >(); __shared__ Real results[ cudaBlockSize ]; results[ threadIdx.x ] = 0.0; IndexType elementPtr = ( this->groupPointers[ groupBegin ] << this->logWarpSize ) + inWarpIdx; @@ -1299,7 +1277,7 @@ void BiEllpackVectorProductCuda( const BiEllpack< Real, Devices::Cuda, Index, St int gridIdx, const int warpSize ) { - Index globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + Index globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; matrix->spmvCuda( *inVector, *outVector, globalIdx ); } #endif @@ -1419,7 +1397,7 @@ void performRowBubbleSortCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize const typename BiEllpack< Real, Devices::Cuda, Index, StripSize >::CompressedRowLengthsVector* rowLengths, int gridIdx ) { - const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->performRowBubbleSortCudaKernel( *rowLengths, stripIdx ); } #endif @@ -1434,7 +1412,7 @@ void computeColumnSizesCuda( BiEllpack< Real, Devices::Cuda, Index, StripSize >* const Index numberOfStrips, int gridIdx ) { - const Index stripIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index stripIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->computeColumnSizesCudaKernel( *rowLengths, numberOfStrips, stripIdx ); } #endif @@ -1538,23 +1516,23 @@ public: Index numberOfStrips = matrix.virtualRows / StripSize; typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix; typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); performRowBubbleSortCuda< Real, Index, StripSize > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, kernel_rowLengths, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -1569,15 +1547,15 @@ public: const Index numberOfStrips = matrix.virtualRows / StripSize; typedef BiEllpack< Real, Devices::Cuda, Index, StripSize > Matrix; typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + CompressedRowLengthsVector* kernel_rowLengths = Cuda::passToDevice( rowLengths ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfStrips, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); computeColumnSizesCuda< Real, Index, StripSize > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, @@ -1585,8 +1563,8 @@ public: numberOfStrips, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -1604,16 +1582,16 @@ public: #ifdef HAVE_CUDA typedef BiEllpack< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); BiEllpackVectorProductCuda< Real, Index, StripSize, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> @@ -1623,9 +1601,9 @@ public: gridIdx, matrix.warpSize ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/COOMatrix.h b/src/TNL/Matrices/COOMatrix.h index e5a4a0fd94634a627cfb0080e0700896f2c98f5c..c5ce76244dcb54b415e38ab57b1fa5e11cbeeab8 100644 --- a/src/TNL/Matrices/COOMatrix.h +++ b/src/TNL/Matrices/COOMatrix.h @@ -35,14 +35,13 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef COOMatrix< Real, Devices::Host, Index > HostType; - typedef COOMatrix< Real, Devices::Cuda, Index > CudaType; - COOMatrix(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = COOMatrix< _Real, _Device, _Index >; - String getTypeVirtual() const; + COOMatrix(); bool setDimensions(const IndexType rows, const IndexType columns); diff --git a/src/TNL/Matrices/COOMatrix_impl.h b/src/TNL/Matrices/COOMatrix_impl.h index 090ccd1180349e41e02093ed740aef79b049b14c..bbdd36002ee4af0ca59da81815ed2527c0c0c828 100644 --- a/src/TNL/Matrices/COOMatrix_impl.h +++ b/src/TNL/Matrices/COOMatrix_impl.h @@ -12,7 +12,7 @@ #include <TNL/Matrices/COOMatrix.h> #include <TNL/Math.h> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> namespace TNL { namespace Matrices { @@ -27,26 +27,6 @@ COOMatrix< Real, Device, Index >::COOMatrix() { }; -template< typename Real, - typename Device, - typename Index > -String COOMatrix< Real, Device, Index >::getType() -{ - return String("COOMatrix< ") + - String(TNL::getType< Real>()) + - String(", ") + - Device::getDeviceType() + - String(" >"); -} - -template< typename Real, - typename Device, - typename Index > -String COOMatrix< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h index 64e202a67716a9710d65505c6c81d7e6069ce9a9..485176d1d849b4be2c296a0f131f5ee2299f89f2 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/CSR.h @@ -49,20 +49,19 @@ public: using IndexType = Index; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef CSR< Real, Devices::Host, Index > HostType; - typedef CSR< Real, Devices::Cuda, Index > CudaType; typedef Sparse< Real, Device, Index > BaseType; using MatrixRow = typename BaseType::MatrixRow; using ConstMatrixRow = typename BaseType::ConstMatrixRow; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = CSR< _Real, _Device, _Index >; + enum SPMVCudaKernel { scalar, vector, hybrid }; CSR(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 74ff682fdaaa94d0b32f8b02375f0b9678f21307..327d250028acca4349495bd663340f999f55024e 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -33,7 +33,7 @@ template< typename Real, typename Index > CSR< Real, Device, Index >::CSR() : spmvCudaKernel( hybrid ), - cudaWarpSize( 32 ), //Devices::Cuda::getWarpSize() ) + cudaWarpSize( 32 ), //Cuda::getWarpSize() ) hybridModeSplit( 4 ) { }; @@ -41,31 +41,15 @@ CSR< Real, Device, Index >::CSR() template< typename Real, typename Device, typename Index > -String CSR< Real, Device, Index >::getType() +String CSR< Real, Device, Index >::getSerializationType() { return String( "Matrices::CSR< ") + - String( TNL::getType< Real>() ) + + TNL::getType< Real>() + String( ", " ) + - Device :: getDeviceType() + + getType< Devices::Host >() + String( " >" ); } -template< typename Real, - typename Device, - typename Index > -String CSR< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String CSR< Real, Device, Index >::getSerializationType() -{ - return HostType::getType(); -} - template< typename Real, typename Device, typename Index > @@ -104,7 +88,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng rowPtrs.bind( this->rowPointers.getData(), this->getRows() ); rowPtrs = rowLengths; this->rowPointers.setElement( this->rows, 0 ); - this->rowPointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + this->rowPointers.template scan< Algorithms::ScanType::Exclusive >(); this->maxRowLength = max( rowLengths ); /**** @@ -161,16 +145,16 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con // // (gdb) p rowPointers.getElement(0) // // Attempt to take address of value not located in memory. // IndexType resultHost ( 0 ); -// IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost ); +// IndexType* resultCuda = Cuda::passToDevice( resultHost ); // // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown: // // 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list' // TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately // delete []cols; // delete []vals; // std::cout << "Checkpoint BEFORE passFromDevice" << std::endl; -// resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE +// resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE // std::cout << "Checkpoint AFTER passFromDevice" << std::endl; -// Devices::Cuda::freeFromDevice( resultCuda ); +// Cuda::freeFromDevice( resultCuda ); // return resultHost; // } } @@ -729,7 +713,7 @@ void CSR< Real, Device, Index >::spmvCudaVectorized( const InVector& inVector, const IndexType warpEnd, const IndexType inWarpIdx ) const { - volatile Real* aux = Devices::Cuda::getSharedMemory< Real >(); + volatile Real* aux = Cuda::getSharedMemory< Real >(); for( IndexType row = warpStart; row < warpEnd; row++ ) { aux[ threadIdx.x ] = 0.0; @@ -769,7 +753,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector, OutVector& outVector, int gridIdx ) const { - IndexType globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + IndexType globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; const IndexType warpStart = warpSize * ( globalIdx / warpSize ); const IndexType warpEnd = min( warpStart + warpSize, this->getRows() ); const IndexType inWarpIdx = globalIdx % warpSize; @@ -780,7 +764,7 @@ void CSR< Real, Device, Index >::vectorProductCuda( const InVector& inVector, /**** * Hybrid mode */ - const Index firstRow = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x; + const Index firstRow = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x; const IndexType lastRow = min( this->getRows(), firstRow + blockDim. x ); const IndexType nonzerosPerRow = ( this->rowPointers[ lastRow ] - this->rowPointers[ firstRow ] ) / ( lastRow - firstRow ); @@ -831,38 +815,6 @@ class CSRDeviceDependentCode< Devices::Host > }; -#ifdef HAVE_MIC -template<> -class CSRDeviceDependentCode< Devices::MIC > -{ - public: - - typedef Devices::MIC Device; - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const CSR< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - throw Exceptions::NotImplementedError("CSRDeviceDependentCode is not implemented for MIC."); - } - /* const Index rows = matrix.getRows(); - const tnlCSRMatrix< Real, Device, Index >* matrixPtr = &matrix; - const InVector* inVectorPtr = &inVector; - OutVector* outVectorPtr = &outVector; -#ifdef HAVE_OPENMP -#pragma omp parallel for firstprivate( matrixPtr, inVectorPtr, outVectorPtr ), schedule(static ), if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < rows; row ++ ) - ( *outVectorPtr )[ row ] = matrixPtr->rowVectorProduct( row, *inVectorPtr ); - }*/ - -}; -#endif - #ifdef HAVE_CUDA template< typename Real, typename Index, @@ -876,7 +828,7 @@ __global__ void CSRVectorProductCudaKernel( const CSR< Real, Devices::Cuda, Inde { typedef CSR< Real, Devices::Cuda, Index > Matrix; static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( matrix->getCudaKernelType() == Matrix::scalar ) { if( rowIdx < matrix->getRows() ) @@ -902,17 +854,17 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix, #ifdef HAVE_CUDA typedef CSR< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); TNL_CHECK_CUDA_DEVICE; - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); if( matrix.getCudaWarpSize() == 32 ) CSRVectorProductCudaKernel< Real, Index, InVector, OutVector, 32 > @@ -959,9 +911,9 @@ void CSRVectorProductCuda( const CSR< Real, Devices::Cuda, Index >& matrix, } TNL_CHECK_CUDA_DEVICE; - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h index a6f06e79745d02f308604fd2bcba46fbeeed497c..a66e1283ab6b8df5e763de35e6a6ac2c14a70bf9 100644 --- a/src/TNL/Matrices/ChunkedEllpack.h +++ b/src/TNL/Matrices/ChunkedEllpack.h @@ -44,9 +44,6 @@ struct tnlChunkedEllpackSliceInfo IndexType chunkSize; IndexType firstRow; IndexType pointer; - - static inline String getType() - { return String( "tnlChunkedEllpackSliceInfo" ); }; }; #ifdef HAVE_CUDA @@ -78,17 +75,16 @@ public: typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef ChunkedEllpack< Real, Devices::Host, Index > HostType; - typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType; typedef Sparse< Real, Device, Index > BaseType; typedef typename BaseType::MatrixRow MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; - ChunkedEllpack(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = ChunkedEllpack< _Real, _Device, _Index >; - String getTypeVirtual() const; + ChunkedEllpack(); static String getSerializationType(); diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 6106ba2cdb0ba474ab9ab80b1b91fd837d33a033..48119c659163d9f57bb3feefe58210f48666b224 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -39,31 +39,15 @@ ChunkedEllpack< Real, Device, Index >::ChunkedEllpack() template< typename Real, typename Device, typename Index > -String ChunkedEllpack< Real, Device, Index >::getType() +String ChunkedEllpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::ChunkedEllpack< ") + - String( TNL::getType< Real >() ) + + getType< Real >() + String( ", " ) + - Device :: getDeviceType() + + getType< Device >() + String( " >" ); } -template< typename Real, - typename Device, - typename Index > -String ChunkedEllpack< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String ChunkedEllpack< Real, Device, Index >::getSerializationType() -{ - return getType(); -} - template< typename Real, typename Device, typename Index > @@ -248,7 +232,7 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre this->rowPointers.setElement( 0, 0 ); for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) this->setSlice( rowLengths, sliceIndex, elementsToAllocation ); - this->rowPointers.prefixSum(); + this->rowPointers.scan(); } // std::cout << "\ngetRowLength after first if: " << std::endl; @@ -314,7 +298,7 @@ template< typename Real, Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); - return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); + return matrixRow.getNonZeroElementsCount( getType< Device >() ); // IndexType elementCount ( 0 ); // ConstMatrixRow matrixRow = this->getRow( row ); @@ -1139,7 +1123,7 @@ __device__ void ChunkedEllpack< Real, Device, Index >::computeSliceVectorProduct { static_assert( std::is_same < DeviceType, Devices::Cuda >::value, "" ); - RealType* chunkProducts = Devices::Cuda::getSharedMemory< RealType >(); + RealType* chunkProducts = Cuda::getSharedMemory< RealType >(); ChunkedEllpackSliceInfo* sliceInfo = ( ChunkedEllpackSliceInfo* ) & chunkProducts[ blockDim.x ]; if( threadIdx.x == 0 ) @@ -1349,7 +1333,7 @@ void ChunkedEllpack< Real, Device, Index >::printStructure( std::ostream& str, const String& name ) const { const IndexType numberOfSlices = this->getNumberOfSlices(); - str << "Matrix type: " << getType() << std::endl + str << "Matrix type: " << getType( *this ) << std::endl << "Marix name: " << name << std::endl << "Rows: " << this->getRows() << std::endl << "Columns: " << this->getColumns() << std::endl @@ -1419,7 +1403,7 @@ __global__ void ChunkedEllpackVectorProductCudaKernel( const ChunkedEllpack< Rea OutVector* outVector, int gridIdx ) { - const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x; + const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() + blockIdx.x; if( sliceIdx < matrix->getNumberOfSlices() ) matrix->computeSliceVectorProduct( inVector, outVector, sliceIdx ); @@ -1472,19 +1456,19 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda > typedef ChunkedEllpack< Real, Devices::Cuda, Index > Matrix; typedef Index IndexType; typedef Real RealType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); dim3 cudaBlockSize( matrix.getNumberOfChunksInSlice() ), - cudaGridSize( Devices::Cuda::getMaxGridSize() ); + cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = matrix.getNumberOfSlices(); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType ) + sizeof( tnlChunkedEllpackSliceInfo< IndexType > ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); ChunkedEllpackVectorProductCudaKernel< Real, Index, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( kernel_this, @@ -1492,9 +1476,9 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda > kernel_outVector, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 8c21e33b0f02300c7223ce420125f91bc064699d..c469927234cd835bef7bcfe36599a47cb843b6cc 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -41,18 +41,16 @@ public: typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Dense< Real, Devices::Host, Index > HostType; - typedef Dense< Real, Devices::Cuda, Index > CudaType; typedef Matrix< Real, Device, Index > BaseType; typedef DenseRow< Real, Index > MatrixRow; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Dense< _Real, _Device, _Index >; Dense(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h index 5b55dbc292b76e63d28c6039ff6f552f89a05183..246bd09edb459e6df9749af9d1589f508c2c5806 100644 --- a/src/TNL/Matrices/Dense_impl.h +++ b/src/TNL/Matrices/Dense_impl.h @@ -24,31 +24,15 @@ Dense< Real, Device, Index >::Dense() { } -template< typename Real, - typename Device, - typename Index > -String Dense< Real, Device, Index >::getType() -{ - return String( "Matrices::Dense< " ) + - String( TNL::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( TNL::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Dense< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > String Dense< Real, Device, Index >::getSerializationType() { - return getType(); + return String( "Matrices::Dense< " ) + + getType< RealType >() + ", " + + getType< Device >() + ", " + + getType< IndexType >() + " >"; } template< typename Real, @@ -602,20 +586,20 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); cudaBlockSize.x = cudaBlockColumns; cudaBlockSize.y = cudaBlockRows; - const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() ); - const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() ); + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) { - cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); if( gridIdx_x == columnGrids - 1 ) - cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); if( gridIdx_y == rowGrids - 1 ) - cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize(); - Dense* this_kernel = Devices::Cuda::passToDevice( *this ); - Matrix1* matrix1_kernel = Devices::Cuda::passToDevice( matrix1 ); - Matrix2* matrix2_kernel = Devices::Cuda::passToDevice( matrix2 ); + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); + Dense* this_kernel = Cuda::passToDevice( *this ); + Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 ); + Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 ); DenseMatrixProductKernel< Real, Index, Matrix1, @@ -632,9 +616,9 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, matrix2Multiplicator, gridIdx_x, gridIdx_y ); - Devices::Cuda::freeFromDevice( this_kernel ); - Devices::Cuda::freeFromDevice( matrix1_kernel ); - Devices::Cuda::freeFromDevice( matrix2_kernel ); + Cuda::freeFromDevice( this_kernel ); + Cuda::freeFromDevice( matrix1_kernel ); + Cuda::freeFromDevice( matrix2_kernel ); } #endif } @@ -685,7 +669,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind rowBlock < tileDim; rowBlock += tileRowBlockSize ) { - tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = inputMatrix->getElementFast( readColumnPosition, readRowPosition + rowBlock ); } @@ -704,7 +688,7 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind { resultMatrix->setElementFast( writeColumnPosition, writeRowPosition + rowBlock, - matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); } @@ -757,7 +741,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, rowBlock += tileRowBlockSize ) { if( readRowPosition + rowBlock < rows ) - tile[ Devices::Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = inputMatrix->getElementFast( readColumnPosition, readRowPosition + rowBlock ); } @@ -781,7 +765,7 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, if( writeRowPosition + rowBlock < columns ) resultMatrix->setElementFast( writeColumnPosition, writeRowPosition + rowBlock, - matrixMultiplicator * tile[ Devices::Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); } } @@ -825,21 +809,21 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); cudaBlockSize.x = cudaBlockColumns; cudaBlockSize.y = cudaBlockRows; - const IndexType rowGrids = roundUpDivision( rowTiles, Devices::Cuda::getMaxGridSize() ); - const IndexType columnGrids = roundUpDivision( columnTiles, Devices::Cuda::getMaxGridSize() ); - const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Devices::Cuda::getNumberOfSharedMemoryBanks(); + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); + const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks(); - Dense* this_device = Devices::Cuda::passToDevice( *this ); - Matrix* matrix_device = Devices::Cuda::passToDevice( matrix ); + Dense* this_device = Cuda::passToDevice( *this ); + Matrix* matrix_device = Cuda::passToDevice( matrix ); for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) { - cudaGridSize.x = cudaGridSize.y = Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); if( gridIdx_x == columnGrids - 1) - cudaGridSize.x = columnTiles % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); if( gridIdx_y == rowGrids - 1 ) - cudaGridSize.y = rowTiles % Devices::Cuda::getMaxGridSize(); + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) && ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) ) { @@ -875,8 +859,8 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, } TNL_CHECK_CUDA_DEVICE; } - Devices::Cuda::freeFromDevice( this_device ); - Devices::Cuda::freeFromDevice( matrix_device ); + Cuda::freeFromDevice( this_device ); + Cuda::freeFromDevice( matrix_device ); #endif } } diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h index 72586dbb3814b95d08cf746c4e32d5704185db2f..76b6ea8c1d5173ee8d0cd85421d919085fe590e5 100644 --- a/src/TNL/Matrices/DistributedMatrix.h +++ b/src/TNL/Matrices/DistributedMatrix.h @@ -54,14 +54,17 @@ public: using CommunicatorType = Communicator; using LocalRangeType = Containers::Subrange< typename Matrix::IndexType >; - using HostType = DistributedMatrix< typename Matrix::HostType, Communicator >; - using CudaType = DistributedMatrix< typename Matrix::CudaType, Communicator >; - using CompressedRowLengthsVector = Containers::DistributedVector< IndexType, DeviceType, IndexType, CommunicatorType >; using MatrixRow = Matrices::SparseRow< RealType, IndexType >; using ConstMatrixRow = Matrices::SparseRow< std::add_const_t< RealType >, std::add_const_t< IndexType > >; + template< typename _Real = RealType, + typename _Device = DeviceType, + typename _Index = IndexType, + typename _Communicator = Communicator > + using Self = DistributedMatrix< typename MatrixType::template Self< _Real, _Device, _Index >, _Communicator >; + DistributedMatrix() = default; DistributedMatrix( DistributedMatrix& ) = default; @@ -80,13 +83,6 @@ public: const Matrix& getLocalMatrix() const; - static String getType(); - - virtual String getTypeVirtual() const; - - // TODO: no getSerializationType method until there is support for serialization - - /* * Some common Matrix methods follow below. */ diff --git a/src/TNL/Matrices/DistributedMatrix_impl.h b/src/TNL/Matrices/DistributedMatrix_impl.h index 33eeef26458cf1c69a744578400f83dc23782466..c1a13a713391f4231b41191fbedb5aa1cb4050c7 100644 --- a/src/TNL/Matrices/DistributedMatrix_impl.h +++ b/src/TNL/Matrices/DistributedMatrix_impl.h @@ -71,28 +71,6 @@ getLocalMatrix() const } -template< typename Matrix, - typename Communicator > -String -DistributedMatrix< Matrix, Communicator >:: -getType() -{ - return String( "Matrices::DistributedMatrix< " ) + - Matrix::getType() + ", " + - // TODO: communicators don't have a getType method - "<Communicator>" + " >"; -} - -template< typename Matrix, - typename Communicator > -String -DistributedMatrix< Matrix, Communicator >:: -getTypeVirtual() const -{ - return getType(); -} - - /* * Some common Matrix methods follow below. */ diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index 0886d686c3947c5d57d022e46beb21cb9ca01f3d..b2abd13c537dc181de638caec4b6adf06755b2bf 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -27,7 +27,7 @@ // operations #include <type_traits> // std::add_const #include <TNL/Atomic.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Pointers/DevicePointer.h> namespace TNL { @@ -105,13 +105,13 @@ public: local_span[1].fetch_min( i ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), - kernel, - &localMatrixPointer.template getData< DeviceType >(), - span_starts.getData(), - span_ends.getData(), - local_span.getData() - ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), + kernel, + &localMatrixPointer.template getData< DeviceType >(), + span_starts.getData(), + span_ends.getData(), + local_span.getData() + ); // set the local-only span (optimization for banded matrices) localOnlySpan.first = local_span.getElement( 0 ); @@ -192,8 +192,8 @@ public: { outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel, - &localMatrixPointer.template getData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localMatrix.getRows(), kernel, + &localMatrixPointer.template getData< DeviceType >() ); } // optimization for banded matrices else { @@ -206,8 +206,8 @@ public: { outVectorView[ i ] = localMatrix->rowVectorProduct( i, inView ); }; - ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1, - &localMatrixPointer.template getData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.first, localOnlySpan.second, kernel1, + &localMatrixPointer.template getData< DeviceType >() ); // wait for all communications to finish CommunicatorType::WaitAll( &commRequests[0], commRequests.size() ); @@ -217,10 +217,10 @@ public: { outVectorView[ i ] = localMatrix->rowVectorProduct( i, globalBufferView ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2, - &localMatrixPointer.template getData< DeviceType >() ); - ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2, - &localMatrixPointer.template getData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, localOnlySpan.first, kernel2, + &localMatrixPointer.template getData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( localOnlySpan.second, localMatrix.getRows(), kernel2, + &localMatrixPointer.template getData< DeviceType >() ); } } diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h index e2479fd54349a93424a1d2e67a6fc42a1d0a6eff..6536f5f6ca6ffa7869851e2ad0883c51de83ed28 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Ellpack.h @@ -39,17 +39,16 @@ public: typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef Ellpack< Real, Devices::Host, Index > HostType; - typedef Ellpack< Real, Devices::Cuda, Index > CudaType; typedef Sparse< Real, Device, Index > BaseType; typedef typename BaseType::MatrixRow MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; - Ellpack(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Ellpack< _Real, _Device, _Index >; - String getTypeVirtual() const; + Ellpack(); static String getSerializationType(); diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/EllpackSymmetric.h index 323772551aac67668cca60ea69fa6651f24dbe1c..d92fc77ee25f203adad1470de46d17552047a290 100644 --- a/src/TNL/Matrices/EllpackSymmetric.h +++ b/src/TNL/Matrices/EllpackSymmetric.h @@ -31,16 +31,14 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef EllpackSymmetric< Real, Devices::Host, Index > HostType; - typedef EllpackSymmetric< Real, Devices::Cuda, Index > CudaType; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = EllpackSymmetric< _Real, _Device, _Index >; EllpackSymmetric(); - static String getType(); - - String getTypeVirtual() const; - void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h index 4c56a8716b5b9ba1612a52750063a42120223682..03e3298557171cd2faaed57b698819af0c87b7d2 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph.h @@ -31,16 +31,14 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef EllpackSymmetricGraph< Real, Devices::Host, Index > HostType; - typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = EllpackSymmetricGraph< _Real, _Device, _Index >; EllpackSymmetricGraph(); - static String getType(); - - String getTypeVirtual() const; - void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h index 6304d5f9dbc4753b3f545919c8039b182928251a..b949292c5f1664562525a4ead8ca17b2ad9f343b 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h @@ -42,26 +42,6 @@ Index EllpackSymmetricGraph< Real, Device, Index >::getAlignedRows() const return this->alignedRows; } -template< typename Real, - typename Device, - typename Index > -String EllpackSymmetricGraph< Real, Device, Index > :: getType() -{ - return String( "EllpackSymmetricGraph< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device::getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index > -String EllpackSymmetricGraph< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > @@ -74,7 +54,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp this->rows = rows; this->columns = columns; if( std::is_same< DeviceType, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); else this->alignedRows = rows; if( this->rowLengths != 0 ) allocateElements(); @@ -937,7 +917,7 @@ void EllpackSymmetricGraphVectorProductCuda( const EllpackSymmetricGraph< Real, const int gridIdx, const int color ) { - int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; matrix->spmvCuda( *inVector, *outVector, globalIdx, color ); } #endif @@ -986,19 +966,19 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef EllpackSymmetricGraph< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ ) { IndexType rows = matrix.getRowsOfColor( color ); const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); EllpackSymmetricGraphVectorProductCuda< Real, Index, InVector, OutVector > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, @@ -1009,9 +989,9 @@ class EllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > } } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h index 7207afc568fffb86687edd45cb183170a57a1775..90369f77af0f0085b140934c27fe3fe5a2d8f015 100644 --- a/src/TNL/Matrices/EllpackSymmetric_impl.h +++ b/src/TNL/Matrices/EllpackSymmetric_impl.h @@ -26,26 +26,6 @@ EllpackSymmetric< Real, Device, Index > :: EllpackSymmetric() { }; -template< typename Real, - typename Device, - typename Index > -String EllpackSymmetric< Real, Device, Index > :: getType() -{ - return String( "EllpackSymmetric< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device::getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index > -String EllpackSymmetric< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > @@ -58,7 +38,7 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row this->rows = rows; this->columns = columns; if( std::is_same< DeviceType, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); else this->alignedRows = rows; if( this->rowLengths != 0 ) allocateElements(); @@ -728,7 +708,7 @@ void EllpackSymmetricVectorProductCuda( const EllpackSymmetric< Real, Devices::C OutVector* outVector, const int gridIdx ) { - int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( globalIdx >= matrix->getRows() ) return; matrix->spmvCuda( *inVector, *outVector, globalIdx ); @@ -780,16 +760,16 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef EllpackSymmetric< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); EllpackSymmetricVectorProductCuda< Real, Index, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> @@ -798,9 +778,9 @@ class EllpackSymmetricDeviceDependentCode< Devices::Cuda > kernel_outVector, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index b4e453793fd2bc7d3c2e2f45182d54faf667c4fa..5ac812cf2101e7f13bafbfd871ac168429be49cd 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -29,33 +29,17 @@ Ellpack< Real, Device, Index > :: Ellpack() template< typename Real, typename Device, typename Index > -String Ellpack< Real, Device, Index > :: getType() +String Ellpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::Ellpack< ") + - String( TNL::getType< Real >() ) + + getType< Real >() + String( ", " ) + - Device :: getDeviceType() + + getType< Device >() + String( ", " ) + - String( TNL::getType< Index >() ) + + getType< Index >() + String( " >" ); } -template< typename Real, - typename Device, - typename Index > -String Ellpack< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Ellpack< Real, Device, Index >::getSerializationType() -{ - return getType(); -} - template< typename Real, typename Device, typename Index > @@ -76,7 +60,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows, this->rows = rows; this->columns = columns; if( std::is_same< Device, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( rows, Devices::Cuda::getWarpSize() ); + this->alignedRows = roundToMultiple( rows, Cuda::getWarpSize() ); else this->alignedRows = rows; if( this->rowLengths != 0 ) allocateElements(); @@ -130,7 +114,7 @@ template< typename Real, Index Ellpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); - return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); + return matrixRow.getNonZeroElementsCount( getType< Device >() ); } template< typename Real, @@ -144,7 +128,7 @@ void Ellpack< Real, Device, Index >::setLike( const Ellpack< Real2, Device2, Ind Sparse< Real, Device, Index >::setLike( matrix ); this->rowLengths = matrix.rowLengths; if( std::is_same< Device, Devices::Cuda >::value ) - this->alignedRows = roundToMultiple( this->getRows(), Devices::Cuda::getWarpSize() ); + this->alignedRows = roundToMultiple( this->getRows(), Cuda::getWarpSize() ); else this->alignedRows = this->getRows(); } @@ -664,8 +648,8 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2 // host -> cuda if( std::is_same< Device, Devices::Cuda >::value ) { - typename ValuesVector::HostType tmpValues; - typename ColumnIndexesVector::HostType tmpColumnIndexes; + typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues; + typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes; tmpValues.setLike( this->values ); tmpColumnIndexes.setLike( this->columnIndexes ); @@ -852,7 +836,7 @@ __global__ void EllpackVectorProductCudaKernel( Real multiplicator, const Index gridIdx ) { - const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= rows ) return; Index i = rowIdx; @@ -918,16 +902,16 @@ class EllpackDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef Ellpack< Real, Device, Index > Matrix; typedef typename Matrix::IndexType IndexType; - //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + //Matrix* kernel_this = Cuda::passToDevice( matrix ); + //InVector* kernel_inVector = Cuda::passToDevice( inVector ); + //OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); EllpackVectorProductCudaKernel < Real, Index > <<< cudaGridSize, cudaBlockSize >>> @@ -944,9 +928,9 @@ class EllpackDeviceDependentCode< Devices::Cuda > gridIdx ); TNL_CHECK_CUDA_DEVICE; } - //Devices::Cuda::freeFromDevice( kernel_this ); - //Devices::Cuda::freeFromDevice( kernel_inVector ); - //Devices::Cuda::freeFromDevice( kernel_outVector ); + //Cuda::freeFromDevice( kernel_this ); + //Cuda::freeFromDevice( kernel_inVector ); + //Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; cudaDeviceSynchronize(); #endif diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h index 07991a573662c7380d6fd2814b0a20db30e7dca8..354b0a9e19d89ddb3cb457ce578752c8f1f513b0 100644 --- a/src/TNL/Matrices/MatrixOperations.h +++ b/src/TNL/Matrices/MatrixOperations.h @@ -21,6 +21,8 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Math.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Matrices { @@ -248,7 +250,7 @@ GemvCudaKernel( const IndexType m, IndexType elementIdx = blockIdx.x * blockDim.x + threadIdx.x; const IndexType gridSize = blockDim.x * gridDim.x; - RealType* shx = Devices::Cuda::getSharedMemory< RealType >(); + RealType* shx = Cuda::getSharedMemory< RealType >(); if( threadIdx.x < n ) shx[ threadIdx.x ] = alpha * x[ threadIdx.x ]; @@ -341,13 +343,13 @@ public: // TODO: use static storage, e.g. from the CudaReductionBuffer, to avoid frequent reallocations Containers::Vector< RealType, Devices::Cuda, IndexType > xDevice; xDevice.setSize( n ); - Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n ); + Algorithms::MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< RealType, RealType, IndexType >( xDevice.getData(), x, n ); // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); + const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() ); dim3 blockSize, gridSize; blockSize.x = 256; - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) ); + gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) ); GemvCudaKernel<<< gridSize, blockSize, n * sizeof( RealType ) >>>( m, n, @@ -401,9 +403,9 @@ public: blockSize.x /= 2; // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( m, blockSize.x ) ); - gridSize.y = Devices::Cuda::getNumberOfBlocks( n, blockSize.y ); + const int desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() ); + gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( m, blockSize.x ) ); + gridSize.y = Cuda::getNumberOfBlocks( n, blockSize.y ); GeamCudaKernel<<< gridSize, blockSize >>>( m, n, diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index eacf8911a149faed530f3fd6138051dfb2795071..418e6f5b3eda29a659c4487dfaf34d88c12fea1d 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -425,11 +425,11 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > bool verbose, bool symReader ) { - typedef typename Matrix::HostType HostMatrixType; - typedef typename HostMatrixType::CompressedRowLengthsVector CompressedRowLengthsVector; + using HostMatrixType = typename Matrix::template Self< typename Matrix::RealType, Devices::Sequential >; + using CompressedRowLengthsVector = typename HostMatrixType::CompressedRowLengthsVector; HostMatrixType hostMatrix; - typename Matrix::CompressedRowLengthsVector rowLengths; + CompressedRowLengthsVector rowLengths; return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); matrix = hostMatrix; diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h index 7472760c23ec6d8df8a4295457cd6773e4ac80df..33c4d2e654cb32f9ba56516a1678b73d17ee3b96 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix_impl.h @@ -12,6 +12,9 @@ #include <TNL/Matrices/Matrix.h> #include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Cuda/MemoryHelpers.h> +#include <TNL/Cuda/SharedMemory.h> namespace TNL { namespace Matrices { @@ -240,7 +243,7 @@ __global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, int gridIdx ) { static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx < matrix->getRows() ) ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); } @@ -255,16 +258,16 @@ void MatrixVectorProductCuda( const Matrix& matrix, { #ifdef HAVE_CUDA typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_this, kernel_inVector, @@ -272,9 +275,9 @@ void MatrixVectorProductCuda( const Matrix& matrix, gridIdx ); TNL_CHECK_CUDA_DEVICE; } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 0496a25a343f336690b7b0d00e699e209aabcd28..1ee6a25e9af4fbf8d8f28461e6658305e2b0151f 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -38,18 +38,16 @@ public: typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Multidiagonal< Real, Devices::Host, Index > HostType; - typedef Multidiagonal< Real, Devices::Cuda, Index > CudaType; typedef Matrix< Real, Device, Index > BaseType; typedef MultidiagonalRow< Real, Index > MatrixRow; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Multidiagonal< _Real, _Device, _Index >; Multidiagonal(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h index 065e7780dcde13ea02404de3a5a42447ca4d4ae4..ff1ac384a3a1a95a170f491de8a56dae09651b3c 100644 --- a/src/TNL/Matrices/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Multidiagonal_impl.h @@ -31,31 +31,15 @@ Multidiagonal< Real, Device, Index > :: Multidiagonal() template< typename Real, typename Device, typename Index > -String Multidiagonal< Real, Device, Index > :: getType() +String Multidiagonal< Real, Device, Index >::getSerializationType() { return String( "Matrices::Multidiagonal< ") + - String( TNL::getType< Real >() ) + + getType< Real >() + String( ", " ) + - Device :: getDeviceType() + + getType< Device >() + String( " >" ); } -template< typename Real, - typename Device, - typename Index > -String Multidiagonal< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Multidiagonal< Real, Device, Index >::getSerializationType() -{ - return getType(); -} - template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h index 8503f6180a27b7e3f31c2555949611da70fc9b3a..5051fc21868b13a5644ebbdb190371bc50c77224 100644 --- a/src/TNL/Matrices/SlicedEllpack.h +++ b/src/TNL/Matrices/SlicedEllpack.h @@ -68,19 +68,18 @@ public: typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef SlicedEllpack< Real, Devices::Host, Index, SliceSize > HostType; - typedef SlicedEllpack< Real, Devices::Cuda, Index, SliceSize > CudaType; typedef Sparse< Real, Device, Index > BaseType; typedef typename BaseType::MatrixRow MatrixRow; typedef SparseRow< const RealType, const IndexType > ConstMatrixRow; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + int _SliceSize = SliceSize > + using Self = SlicedEllpack< _Real, _Device, _Index, _SliceSize >; SlicedEllpack(); - static String getType(); - - String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/SlicedEllpackSymmetric.h index 9e7694de47649259e597f13582c6b77fbc6a47ae..835eccf83d43292f75c2e918c7e7ccd3b5d37aaa 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric.h @@ -49,16 +49,15 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index > typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef SlicedEllpackSymmetric< Real, Devices::Host, Index > HostType; - typedef SlicedEllpackSymmetric< Real, Devices::Cuda, Index > CudaType; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + int _SliceSize = SliceSize > + using Self = SlicedEllpackSymmetric< _Real, _Device, _Index, _SliceSize >; SlicedEllpackSymmetric(); - static String getType(); - - String getTypeVirtual() const; - void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h index 12019b79d6f2acd5c95c53d2dda4abc7da655a61..5fed4082b885093cf7e1a67b1025e73179895471 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h @@ -49,16 +49,15 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; - typedef SlicedEllpackSymmetricGraph< Real, Devices::Host, Index > HostType; - typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index > CudaType; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + int _SliceSize = SliceSize > + using Self = SlicedEllpackSymmetricGraph< _Real, _Device, _Index, _SliceSize >; SlicedEllpackSymmetricGraph(); - static String getType(); - - String getTypeVirtual() const; - void setDimensions( const IndexType rows, const IndexType columns ); diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h index 866211d53b6af573fe4196a8b4767508ead51858..bfe73f231092a0e4ea90c3011b823c6ab8c17d95 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h @@ -25,28 +25,6 @@ template< typename Real, SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::SlicedEllpackSymmetricGraph() : rearranged( false ) { -}; - -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getType() -{ - return String( "SlicedEllpackSymmetricGraph< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device::getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getTypeVirtual() const -{ - return this->getType(); } template< typename Real, @@ -1117,7 +1095,7 @@ __global__ void SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_Cuda typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVector rowLengths, int gridIdx ) { - const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -1174,7 +1152,7 @@ void SlicedEllpackSymmetricGraphVectorProductCuda( const SlicedEllpackSymmetricG const int color, const int sliceOffset ) { - int globalIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset; + int globalIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x + sliceOffset; matrix->smvCuda( *inVector, *outVector, globalIdx, color ); } #endif @@ -1235,21 +1213,21 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); + Matrix* kernel_matrix = Cuda::passToDevice( matrix ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, rowLengths, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_matrix ); + Cuda::freeFromDevice( kernel_matrix ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -1267,10 +1245,10 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); for( IndexType color = 0; color < matrix.getNumberOfColors(); color++ ) { IndexType offset = matrix.colorPointers.getElement( color ); //can be computed in kernel @@ -1280,11 +1258,11 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > //IndexType rows = matrix.colorPointers.getElement( color + 1 ) - matrix.colorPointers.getElement( color ) + inSliceIdx; // TODO: rows id undefined /*const IndexType cudaBlocks = roundUpDivision( rows, cudaBlockSize.x ); - const IndexType cudaGrids = rondUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize ); + const IndexType cudaGrids = rondUpDivision( cudaBlocks, Cuda::getMaxGridSize ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); // TODO: this cannot be used here and i is undefined //IndexType offset = this->colorPointers[ i ]; IndexType inSliceIdx = offset % SliceSize; @@ -1299,9 +1277,9 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > sliceOffset ); }*/ } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index c9dee062c9feb64a4164618b84474b8d89a6dcab..c403fd4c84f09a59883f14f2fc5c23e79c1c65cb 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -24,28 +24,6 @@ template< typename Real, int SliceSize > SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::SlicedEllpackSymmetric() { -}; - -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getType() -{ - return String( "SlicedEllpackSymmetric< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - Device :: getDeviceType() + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::getTypeVirtual() const -{ - return this->getType(); } template< typename Real, @@ -80,7 +58,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + this->slicePointers.template scan< Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } @@ -534,7 +512,7 @@ const SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >* matrix, OutVector* outVector, int gridIdx ) { - int rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + int rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; matrix->spmvCuda( *inVector, *outVector, rowIdx ); } #endif @@ -806,7 +784,7 @@ __global__ void SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKerne typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, int gridIdx ) { - const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -865,21 +843,21 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); + Matrix* kernel_matrix = Cuda::passToDevice( matrix ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, rowLengths, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_matrix ); + Cuda::freeFromDevice( kernel_matrix ); TNL_CHECK_CUDA_DEVICE; #endif } @@ -896,16 +874,16 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + Matrix* kernel_this = Cuda::passToDevice( matrix ); + InVector* kernel_inVector = Cuda::passToDevice( inVector ); + OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); SlicedEllpackSymmetricVectorProductCudaKernel< Real, Index, SliceSize, InVector, OutVector > <<< cudaGridSize, cudaBlockSize >>> ( kernel_this, @@ -913,9 +891,9 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > kernel_outVector, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inVector ); - Devices::Cuda::freeFromDevice( kernel_outVector ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inVector ); + Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index 016edf6996e19fb5fa3cfedf65364ebc22fb53fd..45e8cdee77fbda670d2e3b23a3844ad0bb53d071 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -24,39 +24,21 @@ template< typename Real, int SliceSize > SlicedEllpack< Real, Device, Index, SliceSize >::SlicedEllpack() { -}; +} template< typename Real, typename Device, typename Index, int SliceSize > -String SlicedEllpack< Real, Device, Index, SliceSize >::getType() +String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType() { return String( "Matrices::SlicedEllpack< ") + - String( TNL::getType< Real >() ) + + TNL::getType< Real >() + String( ", " ) + - Device :: getDeviceType() + + getType< Device >() + String( " >" ); } -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpack< Real, Device, Index, SliceSize >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index, - int SliceSize > -String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType() -{ - return getType(); -} - template< typename Real, typename Device, typename Index, @@ -97,7 +79,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->maxRowLength = max( rowLengths ); - this->slicePointers.template prefixSum< Containers::Algorithms::ScanType::Exclusive >(); + this->slicePointers.template scan< Algorithms::ScanType::Exclusive >(); this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } @@ -129,7 +111,7 @@ template< typename Real, Index SlicedEllpack< Real, Device, Index, SliceSize >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); - return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); + return matrixRow.getNonZeroElementsCount( getType< Device >() ); } template< typename Real, @@ -638,19 +620,14 @@ template< typename Real, SlicedEllpack< Real, Device, Index, SliceSize >& SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< Real2, Device2, Index2, SliceSize >& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value || std::is_same< Device, Devices::MIC >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value || std::is_same< Device2, Devices::MIC >::value, - "unknown device" ); - this->setLike( matrix ); this->slicePointers = matrix.slicePointers; this->sliceCompressedRowLengths = matrix.sliceCompressedRowLengths; // host -> cuda if( std::is_same< Device, Devices::Cuda >::value ) { - typename ValuesVector::HostType tmpValues; - typename ColumnIndexesVector::HostType tmpColumnIndexes; + typename ValuesVector::template Self< typename ValuesVector::ValueType, Devices::Sequential > tmpValues; + typename ColumnIndexesVector::template Self< typename ColumnIndexesVector::ValueType, Devices::Sequential > tmpColumnIndexes; tmpValues.setLike( matrix.values ); tmpColumnIndexes.setLike( matrix.columnIndexes ); @@ -672,7 +649,7 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< } // cuda -> host - if( std::is_same< Device, Devices::Host >::value ) { + else { ValuesVector tmpValues; ColumnIndexesVector tmpColumnIndexes; tmpValues.setLike( matrix.values ); @@ -693,10 +670,6 @@ SlicedEllpack< Real, Device, Index, SliceSize >::operator=( const SlicedEllpack< } } } - - if( std::is_same< Device, Devices::MIC >::value ) { - throw Exceptions::NotImplementedError("Cross-device assignment for the SlicedEllpack format is not implemented for MIC."); - } return *this; } @@ -746,7 +719,7 @@ template< typename Real, int SliceSize > void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) const { - if( std::is_same< Device, Devices::Host >::value ) { + if( ! std::is_same< Device, Devices::Cuda >::value ) { for( IndexType row = 0; row < this->getRows(); row++ ) { str <<"Row: " << row << " -> "; @@ -767,7 +740,7 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::print( std::ostream& str ) } } else { - HostType hostMatrix; + Self< Real, Devices::Sequential > hostMatrix; hostMatrix = *this; hostMatrix.print( str ); } @@ -800,12 +773,13 @@ __device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalR } #endif -template<> -class SlicedEllpackDeviceDependentCode< Devices::Host > +// implementation for host types +template< typename Device_ > +class SlicedEllpackDeviceDependentCode { public: - typedef Devices::Host Device; + typedef Device_ Device; template< typename Real, typename Index, @@ -898,7 +872,7 @@ __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( Sliced typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, int gridIdx ) { - const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; + const Index sliceIdx = gridIdx * Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -921,7 +895,7 @@ __global__ void SlicedEllpackVectorProductCudaKernel( Real multiplicator, const Index gridIdx ) { - const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx >= rows ) return; const Index sliceIdx = rowIdx / SliceSize; @@ -997,21 +971,21 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector; - Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); + Matrix* kernel_matrix = Cuda::passToDevice( matrix ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); - const Index cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const Index cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, rowLengths, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_matrix ); + Cuda::freeFromDevice( kernel_matrix ); TNL_CHECK_CUDA_DEVICE; #endif return true; @@ -1031,16 +1005,16 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > #ifdef HAVE_CUDA typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::IndexType IndexType; - //Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); - //InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); - //OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + //Matrix* kernel_this = Cuda::passToDevice( matrix ); + //InVector* kernel_inVector = Cuda::passToDevice( inVector ); + //OutVector* kernel_outVector = Cuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); SlicedEllpackVectorProductCudaKernel < Real, Index, SliceSize > <<< cudaGridSize, cudaBlockSize >>> @@ -1057,68 +1031,13 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > gridIdx ); TNL_CHECK_CUDA_DEVICE; } - //Devices::Cuda::freeFromDevice( kernel_this ); - //Devices::Cuda::freeFromDevice( kernel_inVector ); - //Devices::Cuda::freeFromDevice( kernel_outVector ); + //Cuda::freeFromDevice( kernel_this ); + //Cuda::freeFromDevice( kernel_inVector ); + //Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; cudaDeviceSynchronize(); #endif } - -}; - -template<> -class SlicedEllpackDeviceDependentCode< Devices::MIC > -{ - public: - - typedef Devices::MIC Device; - - template< typename Real, - typename Index, - int SliceSize > - static void initRowTraverse( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const Index row, - Index& rowBegin, - Index& rowEnd, - Index& step ) - { - throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverse"); - } - - template< typename Real, - typename Index, - int SliceSize > - __cuda_callable__ - static void initRowTraverseFast( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const Index row, - Index& rowBegin, - Index& rowEnd, - Index& step ) - { - throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::initRowTraverseFast"); - } - - template< typename Real, - typename Index, - int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) - { - throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::computeMaximalRowLengthInSlices"); - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector, - int SliceSize > - static void vectorProduct( const SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - throw Exceptions::NotImplementedError("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::vectorProduct"); - } }; } // namespace Matrices diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h index ccc8930f9bb99825d89de1f7df6de5ce31fa427d..ff507c3268ce059108bd217e207c9c6487cb30c5 100644 --- a/src/TNL/Matrices/SparseOperations_impl.h +++ b/src/TNL/Matrices/SparseOperations_impl.h @@ -17,7 +17,7 @@ #include <algorithm> #include <TNL/Pointers/DevicePointer.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Matrices { @@ -130,8 +130,8 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) #ifdef HAVE_CUDA dim3 blockSize( 256 ); dim3 gridSize; - const IndexType desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( rows, blockSize.x ) ); + const IndexType desGridSize = 32 * Cuda::DeviceInfo::getCudaMultiprocessors( Cuda::DeviceInfo::getActiveDevice() ); + gridSize.x = min( desGridSize, Cuda::getNumberOfBlocks( rows, blockSize.x ) ); typename Matrix1::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); @@ -140,7 +140,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) const Pointers::DevicePointer< const Matrix2 > Bpointer( B ); // set row lengths - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); SparseMatrixSetRowLengthsVectorKernel<<< gridSize, blockSize >>>( rowLengths.getData(), &Bpointer.template getData< TNL::Devices::Cuda >(), @@ -150,7 +150,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) Apointer->setCompressedRowLengths( rowLengths ); // copy rows - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); SparseMatrixCopyKernel<<< gridSize, blockSize >>>( &Apointer.template modifyData< TNL::Devices::Cuda >(), &Bpointer.template getData< TNL::Devices::Cuda >(), @@ -170,7 +170,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename std::is_same< typename Matrix2::DeviceType, Devices::Host >::value >::type copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) { - typename Matrix2::CudaType B_tmp; + using CudaMatrix2 = typename Matrix2::template Self< typename Matrix2::RealType, Devices::Cuda >; + CudaMatrix2 B_tmp; B_tmp = B; copySparseMatrix_impl( A, B_tmp ); } @@ -182,7 +183,8 @@ typename std::enable_if< ! std::is_same< typename Matrix1::DeviceType, typename std::is_same< typename Matrix2::DeviceType, Devices::Cuda >::value >::type copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) { - typename Matrix1::CudaType A_tmp; + using CudaMatrix1 = typename Matrix1::template Self< typename Matrix1::RealType, Devices::Cuda >; + CudaMatrix1 A_tmp; copySparseMatrix_impl( A_tmp, B ); A = A_tmp; } @@ -353,11 +355,11 @@ reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm ) dest[ i ] = src[ perm[ i ] ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), - kernel, - src.getData(), - dest.getData(), - perm.getData() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), + kernel, + src.getData(), + dest.getData(), + perm.getData() ); } } // namespace Matrices diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/SparseRow.h index c7ebd07039061fdb775a583ac065e0a71c5f4869..f66cd2ceaf1c6f0cd882bb962a78c6649816aa75 100644 --- a/src/TNL/Matrices/SparseRow.h +++ b/src/TNL/Matrices/SparseRow.h @@ -14,7 +14,7 @@ #include <type_traits> #include <ostream> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { namespace Matrices { diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 7f58bd9c492fa59c454b2226f301c83b89e74dbf..3f57fe1c3e6de1cf0e608cd68b5846eb711e321d 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -40,16 +40,15 @@ public: typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Tridiagonal< Real, Devices::Host, Index > HostType; - typedef Tridiagonal< Real, Devices::Cuda, Index > CudaType; typedef Matrix< Real, Device, Index > BaseType; typedef TridiagonalRow< Real, Index > MatrixRow; - Tridiagonal(); - - static String getType(); + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Tridiagonal< _Real, _Device, _Index >; - String getTypeVirtual() const; + Tridiagonal(); static String getSerializationType(); diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h index 9a2d5e4a87c4b505f186bb59a95280eecd5f1bf2..62575f1776144e374b560a65e213248a1177de80 100644 --- a/src/TNL/Matrices/Tridiagonal_impl.h +++ b/src/TNL/Matrices/Tridiagonal_impl.h @@ -27,31 +27,15 @@ Tridiagonal< Real, Device, Index >::Tridiagonal() { } -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getType() -{ - return String( "Matrices::Tridiagonal< " ) + - String( TNL::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( TNL::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > String Tridiagonal< Real, Device, Index >::getSerializationType() { - return getType(); + return String( "Matrices::Tridiagonal< " ) + + getType< RealType >() + ", " + + getType< Device >() + ", " + + getType< IndexType >() + " >"; } template< typename Real, @@ -468,7 +452,7 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De const Real matrixMultiplicator, const Index gridIdx ) { - const Index rowIdx = ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( rowIdx < inMatrix->getRows() ) { if( rowIdx > 0 ) @@ -510,24 +494,24 @@ void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Re if( std::is_same< Device, Devices::Cuda >::value ) { #ifdef HAVE_CUDA - Tridiagonal* kernel_this = Devices::Cuda::passToDevice( *this ); + Tridiagonal* kernel_this = Cuda::passToDevice( *this ); typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; - InMatrixType* kernel_inMatrix = Devices::Cuda::passToDevice( matrix ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Devices::Cuda::getMaxGridSize() ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> ( kernel_inMatrix, kernel_this, matrixMultiplicator, gridIdx ); } - Devices::Cuda::freeFromDevice( kernel_this ); - Devices::Cuda::freeFromDevice( kernel_inMatrix ); + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Meshes/DefaultConfig.h b/src/TNL/Meshes/DefaultConfig.h index 5e8a7cbef85fc2a03681553f7348af5c95f45dbb..36635647477ff7b3d70a3dd0d92b836cea6da12b 100644 --- a/src/TNL/Meshes/DefaultConfig.h +++ b/src/TNL/Meshes/DefaultConfig.h @@ -17,7 +17,7 @@ #pragma once #include <TNL/String.h> -#include <TNL/param-types.h> +#include <TNL/TypeInfo.h> #include <TNL/Meshes/Topologies/SubentityVertexMap.h> namespace TNL { @@ -46,17 +46,6 @@ struct DefaultConfig static constexpr int worldDimension = WorldDimension; static constexpr int meshDimension = Cell::dimension; - static String getType() - { - return String( "Meshes::DefaultConfig< " ) + - Cell::getType() + ", " + - convertToString( WorldDimension ) + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< GlobalIndex >() + ", " + - TNL::getType< LocalIndex >() + ", " + - TNL::getType< Id >() + " >"; - }; - /**** * Storage of mesh entities. */ diff --git a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h index d0461ddedbe1b0aa1f3ab6d3cfa8a5e57c3908a4..6030b976f038ab290ada814575db1bfb444ce694 100644 --- a/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h +++ b/src/TNL/Meshes/DistributedMeshes/BufferEntitiesHelper.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Containers/StaticVector.h> #include <TNL/Communicators/MPIPrint.h> @@ -67,7 +67,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 1, RealType, Device, meshFunctionData[ entity.getIndex() ] = buffer[ j ]; } }; - ParallelFor< Device >::exec( 0, sizex, kernel ); + Algorithms::ParallelFor< Device >::exec( 0, sizex, kernel ); }; }; @@ -115,7 +115,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 2, RealType, Device, meshFunctionData[ entity.getIndex() ] = buffer[ j * sizex + i ]; } }; - ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel ); + Algorithms::ParallelFor2D< Device >::exec( 0, 0, sizex, sizey, kernel ); }; }; @@ -164,7 +164,7 @@ class BufferEntitiesHelper< MeshFunctionType, MaskPointer, 3, RealType, Device, meshFunctionData[ entity.getIndex() ] = buffer[ k * sizex * sizey + j * sizex + i ]; } }; - ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel ); + Algorithms::ParallelFor3D< Device >::exec( 0, 0, 0, sizex, sizey, sizez, kernel ); }; }; diff --git a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h index 20efa02599ef6808ad1516f8409bab08648aca47..ec30a4f470f96d855edccbe3d67e4a6542bc657a 100644 --- a/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h +++ b/src/TNL/Meshes/DistributedMeshes/CopyEntitiesHelper.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Meshes { @@ -45,7 +45,7 @@ class CopyEntitiesHelper<MeshFunctionType, 1> fromEntity.refresh(); toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()]; }; - ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel ); + Algorithms::ParallelFor< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0, (Index)size.x(), kernel ); } @@ -79,7 +79,7 @@ class CopyEntitiesHelper<MeshFunctionType,2> fromEntity.refresh(); toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()]; }; - ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel ); + Algorithms::ParallelFor2D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)size.x(), (Index)size.y(), kernel ); } }; @@ -113,7 +113,7 @@ class CopyEntitiesHelper<MeshFunctionType,3> fromEntity.refresh(); toData[toEntity.getIndex()]=fromData[fromEntity.getIndex()]; }; - ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel ); + Algorithms::ParallelFor3D< typename MeshFunctionType::MeshType::DeviceType >::exec( (Index)0,(Index)0,(Index)0,(Index)size.x(),(Index)size.y(), (Index)size.z(), kernel ); } }; diff --git a/src/TNL/Meshes/Geometry/getEntityCenter.h b/src/TNL/Meshes/Geometry/getEntityCenter.h index 59cd950ca180cdbf8095382536544e9fe0ffde51..a37c27acf00523341382a7f44c5ef74adfd14d45 100644 --- a/src/TNL/Meshes/Geometry/getEntityCenter.h +++ b/src/TNL/Meshes/Geometry/getEntityCenter.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Meshes/GridEntity.h> #include <TNL/Meshes/Mesh.h> #include <TNL/Meshes/MeshEntity.h> diff --git a/src/TNL/Meshes/Geometry/getEntityMeasure.h b/src/TNL/Meshes/Geometry/getEntityMeasure.h index 7402e4f6db3226b534caf6753958fc10e4efdb1a..a3381ed96b1e72ddc2f0ccf25bbbdccd7e71739a 100644 --- a/src/TNL/Meshes/Geometry/getEntityMeasure.h +++ b/src/TNL/Meshes/Geometry/getEntityMeasure.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Meshes/GridEntity.h> #include <TNL/Meshes/Mesh.h> #include <TNL/Meshes/MeshEntity.h> diff --git a/src/TNL/Meshes/GridDetails/Grid1D.h b/src/TNL/Meshes/GridDetails/Grid1D.h index 53b748c4e6a7893d8fb645ffdcc271c80c8c5b58..81811fe9005c44bb1239f06fe423f4d566c7c77c 100644 --- a/src/TNL/Meshes/GridDetails/Grid1D.h +++ b/src/TNL/Meshes/GridDetails/Grid1D.h @@ -34,8 +34,6 @@ class Grid< 1, Real, Device, Index > : public Object typedef Index GlobalIndexType; typedef Containers::StaticVector< 1, Real > PointType; typedef Containers::StaticVector< 1, Index > CoordinatesType; - typedef Grid< 1, Real, Devices::Host, Index > HostType; - typedef Grid< 1, Real, Devices::Cuda, Index > CudaType; typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType; @@ -65,16 +63,6 @@ class Grid< 1, Real, Device, Index > : public Object // empty destructor is needed only to avoid crappy nvcc warnings ~Grid() {} - /** - * \brief Returns type of grid Real (value), Device type and the type of Index. - */ - static String getType(); - - /** - * \brief Returns type of grid Real (value), Device type and the type of Index. - */ - String getTypeVirtual() const; - /** * \brief Returns (host) type of grid Real (value), Device type and the type of Index. */ diff --git a/src/TNL/Meshes/GridDetails/Grid1D_impl.h b/src/TNL/Meshes/GridDetails/Grid1D_impl.h index a747544df981bec045d710cd18220f96832487d3..279ec9810184ea097212e45984183ad1ddc97e70 100644 --- a/src/TNL/Meshes/GridDetails/Grid1D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid1D_impl.h @@ -44,32 +44,16 @@ Grid< 1, Real, Device, Index >::Grid( const Index xSize ) this->setDimensions( xSize ); } -template< typename Real, - typename Device, - typename Index > -String Grid< 1, Real, Device, Index >::getType() -{ - return String( "Meshes::Grid< " ) + - convertToString( getMeshDimension() ) + ", " + - String( TNL::getType< RealType >() ) + ", " + - String( Device::getDeviceType() ) + ", " + - String( TNL::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Grid< 1, Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > String Grid< 1, Real, Device, Index >::getSerializationType() { - return HostType::getType(); + return String( "Meshes::Grid< " ) + + convertToString( getMeshDimension() ) + ", " + + getType< RealType >() + ", " + + getType< Devices::Host >() + ", " + + getType< IndexType >() + " >"; }; template< typename Real, diff --git a/src/TNL/Meshes/GridDetails/Grid2D.h b/src/TNL/Meshes/GridDetails/Grid2D.h index 61f3c11c0e0684c892a02971bfc5a2d2df67979c..b24be9ba29503f4f6b80f2c3ad0ada1097d89844 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D.h +++ b/src/TNL/Meshes/GridDetails/Grid2D.h @@ -34,8 +34,6 @@ class Grid< 2, Real, Device, Index > : public Object typedef Index GlobalIndexType; typedef Containers::StaticVector< 2, Real > PointType; typedef Containers::StaticVector< 2, Index > CoordinatesType; - typedef Grid< 2, Real, Devices::Host, Index > HostType; - typedef Grid< 2, Real, Devices::Cuda, Index > CudaType; typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType; @@ -57,21 +55,11 @@ class Grid< 2, Real, Device, Index > : public Object */ Grid(); - /** - * \brief See Grid1D::getType(). - */ Grid( const Index xSize, const Index ySize ); // empty destructor is needed only to avoid crappy nvcc warnings ~Grid() {} - static String getType(); - - /** - * \brief See Grid1D::getTypeVirtual(). - */ - String getTypeVirtual() const; - /** * \brief See Grid1D::getSerializationType(). */ diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h index 7b673e0a819d85c21426c2e3a7a09f4dcceb4f48..259181688c0566abdd3025bac1cbda1429a60d10 100644 --- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h @@ -54,29 +54,13 @@ Grid< 2, Real, Device, Index >::Grid( const Index xSize, const Index ySize ) template< typename Real, typename Device, typename Index > -String Grid< 2, Real, Device, Index > :: getType() +String Grid< 2, Real, Device, Index > :: getSerializationType() { return String( "Meshes::Grid< " ) + convertToString( getMeshDimension() ) + ", " + - String( TNL::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( TNL::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Grid< 2, Real, Device, Index > :: getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Grid< 2, Real, Device, Index > :: getSerializationType() -{ - return HostType::getType(); + getType< RealType >() + ", " + + getType< Devices::Host >() + ", " + + getType< IndexType >() + " >"; }; template< typename Real, diff --git a/src/TNL/Meshes/GridDetails/Grid3D.h b/src/TNL/Meshes/GridDetails/Grid3D.h index 67c752cb13700628abcac76de2f066473baf26a7..881fb0074bf642ca6fdcd1300df849c93d205792 100644 --- a/src/TNL/Meshes/GridDetails/Grid3D.h +++ b/src/TNL/Meshes/GridDetails/Grid3D.h @@ -33,8 +33,6 @@ class Grid< 3, Real, Device, Index > : public Object typedef Index GlobalIndexType; typedef Containers::StaticVector< 3, Real > PointType; typedef Containers::StaticVector< 3, Index > CoordinatesType; - typedef Grid< 3, Real, Devices::Host, Index > HostType; - typedef Grid< 3, Real, Devices::Cuda, Index > CudaType; typedef DistributedMeshes::DistributedMesh <Grid> DistributedMeshType; @@ -62,16 +60,6 @@ class Grid< 3, Real, Device, Index > : public Object // empty destructor is needed only to avoid crappy nvcc warnings ~Grid() {} - /** - * \brief See Grid1D::getType(). - */ - static String getType(); - - /** - * \brief See Grid1D::getTypeVirtual(). - */ - String getTypeVirtual() const; - /** * \brief See Grid1D::getSerializationType(). */ diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h index dbd5fcf47e71fd6c2dbb0ba271eb1656773a9a80..f4707a8ce34654544f9d7455122ebc3a340d690b 100644 --- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h +++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h @@ -68,29 +68,13 @@ Grid< 3, Real, Device, Index >::Grid( const Index xSize, const Index ySize, cons template< typename Real, typename Device, typename Index > -String Grid< 3, Real, Device, Index > :: getType() +String Grid< 3, Real, Device, Index > :: getSerializationType() { return String( "Meshes::Grid< " ) + convertToString( getMeshDimension() ) + ", " + - String( TNL::getType< RealType >() ) + ", " + - String( Device :: getDeviceType() ) + ", " + - String( TNL::getType< IndexType >() ) + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Grid< 3, Real, Device, Index > :: getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Grid< 3, Real, Device, Index > :: getSerializationType() -{ - return HostType::getType(); + getType< RealType >() + ", " + + getType< Devices::Host >() + ", " + + getType< IndexType >() + " >"; }; template< typename Real, diff --git a/src/TNL/Meshes/GridDetails/GridTraverser.h b/src/TNL/Meshes/GridDetails/GridTraverser.h index fb6b34da12fb750c0ad74cc3ba05b086727adf01..e8702153fb03343f85badc160676b1c788eaa948 100644 --- a/src/TNL/Meshes/GridDetails/GridTraverser.h +++ b/src/TNL/Meshes/GridDetails/GridTraverser.h @@ -12,7 +12,6 @@ #include <TNL/Meshes/Grid.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/CudaStreamPool.h> namespace TNL { namespace Meshes { @@ -89,38 +88,6 @@ class GridTraverser< Meshes::Grid< 1, Real, Devices::Cuda, Index > > const int& stream = 0 ); }; -/**** - * 1D grid, Devices::MIC - */ -template< typename Real, - typename Index > -class GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > > -{ - public: - - typedef Meshes::Grid< 1, Real, Devices::MIC, Index > GridType; - typedef Pointers::SharedPointer< GridType > GridPointer; - typedef Real RealType; - typedef Devices::MIC DeviceType; - typedef Index IndexType; - typedef typename GridType::CoordinatesType CoordinatesType; - - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities > - static void - processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - GridTraverserMode mode = synchronousMode, - const int& stream = 0 ); -}; - - /**** * 2D grid, Devices::Host @@ -202,45 +169,6 @@ class GridTraverser< Meshes::Grid< 2, Real, Devices::Cuda, Index > > const GridEntityParameters&... gridEntityParameters ); }; -/**** - * 2D grid, Devices::MIC - */ -template< typename Real, - typename Index > -class GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > > -{ - public: - - typedef Meshes::Grid< 2, Real, Devices::MIC, Index > GridType; - typedef Pointers::SharedPointer< GridType > GridPointer; - typedef Real RealType; - typedef Devices::MIC DeviceType; - typedef Index IndexType; - typedef typename GridType::CoordinatesType CoordinatesType; - - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities, - int XOrthogonalBoundary = 1, - int YOrthogonalBoundary = 1, - typename... GridEntityParameters > - static void - processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - // FIXME: hack around nvcc bug (error: default argument not at end of parameter list) - //GridTraverserMode mode = synchronousMode, - GridTraverserMode mode, - // const int& stream = 0, - const int& stream, - // gridEntityParameters are passed to GridEntity's constructor - // (i.e. orientation and basis for faces) - const GridEntityParameters&... gridEntityParameters ); -}; /**** * 3D grid, Devices::Host @@ -324,51 +252,9 @@ class GridTraverser< Meshes::Grid< 3, Real, Devices::Cuda, Index > > const GridEntityParameters&... gridEntityParameters ); }; -/**** - * 3D grid, Devices::Cuda - */ -template< typename Real, - typename Index > -class GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > > -{ - public: - - typedef Meshes::Grid< 3, Real, Devices::MIC, Index > GridType; - typedef Pointers::SharedPointer< GridType > GridPointer; - typedef Real RealType; - typedef Devices::MIC DeviceType; - typedef Index IndexType; - typedef typename GridType::CoordinatesType CoordinatesType; - - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities, - int XOrthogonalBoundary = 1, - int YOrthogonalBoundary = 1, - int ZOrthogonalBoundary = 1, - typename... GridEntityParameters > - static void - processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - // FIXME: hack around nvcc bug (error: default argument not at end of parameter list) - //GridTraverserMode mode = synchronousMode, - GridTraverserMode mode, - // const int& stream = 0, - const int& stream, - // gridEntityParameters are passed to GridEntity's constructor - // (i.e. orientation and basis for faces and edges) - const GridEntityParameters&... gridEntityParameters ); -}; - } // namespace Meshes } // namespace TNL #include <TNL/Meshes/GridDetails/GridTraverser_1D.hpp> #include <TNL/Meshes/GridDetails/GridTraverser_2D.hpp> #include <TNL/Meshes/GridDetails/GridTraverser_3D.hpp> - diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp index 59989bb2a14a85443c2f9616c583ab945b727116..c1aab9660d50ee8fe6917ae08c6bb869e333c056 100644 --- a/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp +++ b/src/TNL/Meshes/GridDetails/GridTraverser_1D.hpp @@ -14,10 +14,9 @@ #pragma once -#include <TNL/Devices/MIC.h> #include <TNL/Meshes/Grid.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/CudaStreamPool.h> +#include <TNL/Cuda/StreamPool.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Meshes/GridDetails/GridTraverser.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -121,7 +120,7 @@ GridTraverser1D( typedef Meshes::Grid< 1, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( coordinates <= end ) { GridEntity entity( *grid, coordinates ); @@ -183,10 +182,10 @@ processEntities( const int& stream ) { #ifdef HAVE_CUDA - auto& pool = CudaStreamPool::getInstance(); + auto& pool = Cuda::StreamPool::getInstance(); const cudaStream_t& s = pool.getStream( stream ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); if( processOnlyBoundaryEntities ) { dim3 cudaBlockSize( 2 ); @@ -201,7 +200,7 @@ processEntities( else { dim3 blockSize( 256 ), blocksCount, gridsCount; - Devices::Cuda::setupThreads( + Cuda::setupThreads( blockSize, blocksCount, gridsCount, @@ -210,7 +209,7 @@ processEntities( for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ ) { dim3 gridSize; - Devices::Cuda::setupGrid( + Cuda::setupGrid( blocksCount, gridsCount, gridIdx, @@ -226,8 +225,8 @@ processEntities( /*dim3 cudaBlockSize( 256 ); dim3 cudaBlocks; - cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); - const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); + cudaBlocks.x = Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); + const IndexType cudaXGrids = Cuda::getNumberOfGrids( cudaBlocks.x ); for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor > @@ -255,69 +254,5 @@ processEntities( #endif } -/**** - * 1D traverser, MIC - */ - -template< typename Real, - typename Index > - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities > -void -GridTraverser< Meshes::Grid< 1, Real, Devices::MIC, Index > >:: -processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - GridTraverserMode mode, - const int& stream ) -{ - throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <1, Real, Device::MIC>"); -/* - auto& pool = CudaStreamPool::getInstance(); - const cudaStream_t& s = pool.getStream( stream ); - - Devices::Cuda::synchronizeDevice(); - if( processOnlyBoundaryEntities ) - { - dim3 cudaBlockSize( 2 ); - dim3 cudaBlocks( 1 ); - GridBoundaryTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor > - <<< cudaBlocks, cudaBlockSize, 0, s >>> - ( &gridPointer.template getData< Devices::Cuda >(), - userData, - begin, - end ); - } - else - { - dim3 cudaBlockSize( 256 ); - dim3 cudaBlocks; - cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); - const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); - - for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) - GridTraverser1D< Real, Index, GridEntity, UserData, EntitiesProcessor > - <<< cudaBlocks, cudaBlockSize, 0, s >>> - ( &gridPointer.template getData< Devices::Cuda >(), - userData, - begin, - end, - gridXIdx ); - } - - // only launches into the stream 0 are synchronized - if( stream == 0 ) - { - cudaStreamSynchronize( s ); - TNL_CHECK_CUDA_DEVICE; - } -*/ -} - - } // namespace Meshes +} // namespace Meshes } // namespace TNL diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp index 50b30c0190bdda8c6c266385ecd785884f3282ac..721ec96d2331c103cb0179e5bd77b224b700c28f 100644 --- a/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp +++ b/src/TNL/Meshes/GridDetails/GridTraverser_2D.hpp @@ -10,10 +10,9 @@ #pragma once -#include <TNL/Devices/MIC.h> #include <TNL/Meshes/Grid.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/CudaStreamPool.h> +#include <TNL/Cuda/StreamPool.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Meshes/GridDetails/GridTraverser.h> @@ -149,8 +148,8 @@ GridTraverser2D( typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx ); if( coordinates <= end ) { @@ -187,7 +186,7 @@ GridTraverser2DBoundaryAlongX( typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx ); coordinates.y() = fixedY; if( coordinates.x() <= endX ) @@ -223,7 +222,7 @@ GridTraverser2DBoundaryAlongY( typename GridType::CoordinatesType coordinates; coordinates.x() = fixedX; - coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx ); if( coordinates.y() <= endY ) { @@ -292,10 +291,10 @@ GridTraverser2DBoundary( /*const Index aux = max( entitiesAlongX, entitiesAlongY ); - const Index& warpSize = Devices::Cuda::getWarpSize(); + const Index& warpSize = Cuda::getWarpSize(); const Index threadsPerAxis = warpSize * ( aux / warpSize + ( aux % warpSize != 0 ) ); - Index threadId = Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + Index threadId = Cuda::getGlobalThreadIdx_x( gridIdx ); GridEntity entity( *grid, CoordinatesType( 0, 0 ), gridEntityParameters... ); @@ -415,19 +414,19 @@ processEntities( dim3 cudaBlockSize( 256 ); dim3 cudaBlocksCountAlongX, cudaGridsCountAlongX, cudaBlocksCountAlongY, cudaGridsCountAlongY; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 ); - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongX, cudaGridsCountAlongX, end.x() - begin.x() + 1 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongY, cudaGridsCountAlongY, end.y() - begin.y() - 1 ); - auto& pool = CudaStreamPool::getInstance(); - Devices::Cuda::synchronizeDevice(); - + auto& pool = Cuda::StreamPool::getInstance(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); + const cudaStream_t& s1 = pool.getStream( stream ); const cudaStream_t& s2 = pool.getStream( stream + 1 ); dim3 gridIdx, cudaGridSize; for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize ); - //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); + Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize ); + //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s1 >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -451,7 +450,7 @@ processEntities( const cudaStream_t& s4 = pool.getStream( stream + 3 ); for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize ); + Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize ); GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s3 >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -483,15 +482,15 @@ processEntities( const IndexType maxFaceSize = max( entitiesAlongX, entitiesAlongY ); const IndexType blocksPerFace = maxFaceSize / cudaBlockSize.x + ( maxFaceSize % cudaBlockSize.x != 0 ); IndexType cudaThreadsCount = 4 * cudaBlockSize.x * blocksPerFace; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, cudaThreadsCount ); //std::cerr << "blocksPerFace = " << blocksPerFace << "Threads count = " << cudaThreadsCount // << "cudaBlockCount = " << cudaBlocksCount.x << std::endl; dim3 gridIdx, cudaGridSize; - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); - //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); + Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); + //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX ); GridTraverser2DBoundary< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -512,20 +511,20 @@ processEntities( { dim3 cudaBlockSize( 16, 16 ); dim3 cudaBlocksCount, cudaGridsCount; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, - end.x() - begin.x() + 1, - end.y() - begin.y() + 1 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, + end.x() - begin.x() + 1, + end.y() - begin.y() + 1 ); - auto& pool = CudaStreamPool::getInstance(); + auto& pool = Cuda::StreamPool::getInstance(); const cudaStream_t& s = pool.getStream( stream ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); dim3 gridIdx, cudaGridSize; for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ ) { - Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); - //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); + Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize ); + //Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount ); GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaGridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -553,104 +552,5 @@ processEntities( #endif } - -/**** - * 2D traverser, MIC - */ -template< typename Real, - typename Index > - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities, - int XOrthogonalBoundary, - int YOrthogonalBoundary, - typename... GridEntityParameters > -void -GridTraverser< Meshes::Grid< 2, Real, Devices::MIC, Index > >:: -processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - GridTraverserMode mode, - const int& stream, - const GridEntityParameters&... gridEntityParameters ) -{ - - -#ifdef HAVE_MIC - Devices::MIC::synchronizeDevice(); - - //TOHLE JE PRUSER -- nemim poslat vypustku -- - //GridEntity entity( gridPointer.template getData< Devices::MIC >(), begin, gridEntityParameters... ); - - - Devices::MICHider<const GridType> hMicGrid; - hMicGrid.pointer=& gridPointer.template getData< Devices::MIC >(); - Devices::MICHider<UserData> hMicUserData; - hMicUserData.pointer=& userDataPointer.template modifyData<Devices::MIC>(); - TNLMICSTRUCT(begin, const CoordinatesType); - TNLMICSTRUCT(end, const CoordinatesType); - - #pragma offload target(mic) in(sbegin,send,hMicUserData,hMicGrid) - { - - #pragma omp parallel firstprivate( sbegin, send ) - { - TNLMICSTRUCTUSE(begin, const CoordinatesType); - TNLMICSTRUCTUSE(end, const CoordinatesType); - GridEntity entity( *(hMicGrid.pointer), *(kernelbegin) ); - - if( processOnlyBoundaryEntities ) - { - if( YOrthogonalBoundary ) - #pragma omp for - for( auto k = kernelbegin->x(); - k <= kernelend->x(); - k ++ ) - { - entity.getCoordinates().x() = k; - entity.getCoordinates().y() = kernelbegin->y(); - entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity ); - entity.getCoordinates().y() = kernelend->y(); - entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity ); - } - if( XOrthogonalBoundary ) - #pragma omp for - for( auto k = kernelbegin->y(); - k <= kernelend->y(); - k ++ ) - { - entity.getCoordinates().y() = k; - entity.getCoordinates().x() = kernelbegin->x(); - entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity ); - entity.getCoordinates().x() = kernelend->x(); - entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity ); - } - } - else - { - #pragma omp for - for( IndexType y = kernelbegin->y(); y <= kernelend->y(); y ++ ) - for( IndexType x = kernelbegin->x(); x <= kernelend->x(); x ++ ) - { - // std::cerr << x << " " <<y << std::endl; - entity.getCoordinates().x() = x; - entity.getCoordinates().y() = y; - entity.refresh(); - EntitiesProcessor::processEntity( entity.getMesh(), *(hMicUserData.pointer), entity ); - } - } - } - } - -#endif -} - } // namespace Meshes +} // namespace Meshes } // namespace TNL diff --git a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp index 5a3cd568f93bcb20f40682e55959eadf50b5c67f..a9aad8c9533dfecdc6e5410be51705d24438725c 100644 --- a/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp +++ b/src/TNL/Meshes/GridDetails/GridTraverser_3D.hpp @@ -10,10 +10,9 @@ #pragma once -#include <TNL/Devices/MIC.h> #include <TNL/Meshes/Grid.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/CudaStreamPool.h> +#include <TNL/Cuda/StreamPool.h> #include <TNL/Exceptions/CudaSupportMissing.h> #include <TNL/Meshes/GridDetails/GridTraverser.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -178,9 +177,9 @@ GridTraverser3D( typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = begin.x() + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.y() = begin.y() + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); - coordinates.z() = begin.z() + Devices::Cuda::getGlobalThreadIdx_z( gridIdx ); + coordinates.x() = begin.x() + Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.y() = begin.y() + Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.z() = begin.z() + Cuda::getGlobalThreadIdx_z( gridIdx ); if( coordinates <= end ) { @@ -218,8 +217,8 @@ GridTraverser3DBoundaryAlongXY( typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.y() = beginY + Cuda::getGlobalThreadIdx_y( gridIdx ); coordinates.z() = fixedZ; if( coordinates.x() <= endX && coordinates.y() <= endY ) @@ -255,9 +254,9 @@ GridTraverser3DBoundaryAlongXZ( typedef Meshes::Grid< 3, Real, Devices::Cuda, Index > GridType; typename GridType::CoordinatesType coordinates; - coordinates.x() = beginX + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.x() = beginX + Cuda::getGlobalThreadIdx_x( gridIdx ); coordinates.y() = fixedY; - coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx ); if( coordinates.x() <= endX && coordinates.z() <= endZ ) { @@ -293,8 +292,8 @@ GridTraverser3DBoundaryAlongYZ( typename GridType::CoordinatesType coordinates; coordinates.x() = fixedX; - coordinates.y() = beginY + Devices::Cuda::getGlobalThreadIdx_x( gridIdx ); - coordinates.z() = beginZ + Devices::Cuda::getGlobalThreadIdx_y( gridIdx ); + coordinates.y() = beginY + Cuda::getGlobalThreadIdx_x( gridIdx ); + coordinates.z() = beginZ + Cuda::getGlobalThreadIdx_y( gridIdx ); if( coordinates.y() <= endY && coordinates.z() <= endZ ) { @@ -342,13 +341,13 @@ processEntities( dim3 cudaBlocksCountAlongXY, cudaBlocksCountAlongXZ, cudaBlocksCountAlongYZ, cudaGridsCountAlongXY, cudaGridsCountAlongXZ, cudaGridsCountAlongYZ; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY ); - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 ); - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXY, cudaGridsCountAlongXY, entitiesAlongX, entitiesAlongY ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, entitiesAlongX, entitiesAlongZ - 2 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, entitiesAlongY - 2, entitiesAlongZ - 2 ); + + auto& pool = Cuda::StreamPool::getInstance(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); - auto& pool = CudaStreamPool::getInstance(); - Devices::Cuda::synchronizeDevice(); - const cudaStream_t& s1 = pool.getStream( stream ); const cudaStream_t& s2 = pool.getStream( stream + 1 ); const cudaStream_t& s3 = pool.getStream( stream + 2 ); @@ -360,7 +359,7 @@ processEntities( for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXY.y; gridIdx.y++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXY.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize ); + Cuda::setupGrid( cudaBlocksCountAlongXY, cudaGridsCountAlongXY, gridIdx, gridSize ); GridTraverser3DBoundaryAlongXY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaBlocksCountAlongXY, cudaBlockSize, 0 , s1 >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -387,7 +386,7 @@ processEntities( for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongXZ.y; gridIdx.y++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongXZ.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize ); + Cuda::setupGrid( cudaBlocksCountAlongXZ, cudaGridsCountAlongXZ, gridIdx, gridSize ); GridTraverser3DBoundaryAlongXZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaBlocksCountAlongXZ, cudaBlockSize, 0, s3 >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -414,7 +413,7 @@ processEntities( for( gridIdx.y = 0; gridIdx.y < cudaGridsCountAlongYZ.y; gridIdx.y++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongYZ.x; gridIdx.x++ ) { - Devices::Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize ); + Cuda::setupGrid( cudaBlocksCountAlongYZ, cudaGridsCountAlongYZ, gridIdx, gridSize ); GridTraverser3DBoundaryAlongYZ< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< cudaBlocksCountAlongYZ, cudaBlockSize, 0, s5 >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -451,21 +450,21 @@ processEntities( dim3 cudaBlockSize( 8, 8, 8 ); dim3 cudaBlocksCount, cudaGridsCount; - Devices::Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, - end.x() - begin.x() + 1, - end.y() - begin.y() + 1, - end.z() - begin.z() + 1 ); + Cuda::setupThreads( cudaBlockSize, cudaBlocksCount, cudaGridsCount, + end.x() - begin.x() + 1, + end.y() - begin.y() + 1, + end.z() - begin.z() + 1 ); - auto& pool = CudaStreamPool::getInstance(); + auto& pool = Cuda::StreamPool::getInstance(); const cudaStream_t& s = pool.getStream( stream ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); dim3 gridIdx, gridSize; for( gridIdx.z = 0; gridIdx.z < cudaGridsCount.z; gridIdx.z ++ ) for( gridIdx.y = 0; gridIdx.y < cudaGridsCount.y; gridIdx.y ++ ) for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ ) { - Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize ); + Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, gridSize ); GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > <<< gridSize, cudaBlockSize, 0, s >>> ( &gridPointer.template getData< Devices::Cuda >(), @@ -488,68 +487,5 @@ processEntities( #endif } -/**** - * 3D traverser, MIC - */ -template< typename Real, - typename Index > - template< - typename GridEntity, - typename EntitiesProcessor, - typename UserData, - bool processOnlyBoundaryEntities, - int XOrthogonalBoundary, - int YOrthogonalBoundary, - int ZOrthogonalBoundary, - typename... GridEntityParameters > -void -GridTraverser< Meshes::Grid< 3, Real, Devices::MIC, Index > >:: -processEntities( - const GridPointer& gridPointer, - const CoordinatesType& begin, - const CoordinatesType& end, - UserData& userData, - GridTraverserMode mode, - const int& stream, - const GridEntityParameters&... gridEntityParameters ) -{ - throw Exceptions::NotImplementedError("Not Implemented yet Grid Traverser <3, Real, Device::MIC>"); - -/* HAVE_CUDA - dim3 cudaBlockSize( 8, 8, 8 ); - dim3 cudaBlocks; - cudaBlocks.x = Devices::Cuda::getNumberOfBlocks( end.x() - begin.x() + 1, cudaBlockSize.x ); - cudaBlocks.y = Devices::Cuda::getNumberOfBlocks( end.y() - begin.y() + 1, cudaBlockSize.y ); - cudaBlocks.z = Devices::Cuda::getNumberOfBlocks( end.z() - begin.z() + 1, cudaBlockSize.z ); - const IndexType cudaXGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.x ); - const IndexType cudaYGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.y ); - const IndexType cudaZGrids = Devices::Cuda::getNumberOfGrids( cudaBlocks.z ); - - auto& pool = CudaStreamPool::getInstance(); - const cudaStream_t& s = pool.getStream( stream ); - - Devices::Cuda::synchronizeDevice(); - for( IndexType gridZIdx = 0; gridZIdx < cudaZGrids; gridZIdx ++ ) - for( IndexType gridYIdx = 0; gridYIdx < cudaYGrids; gridYIdx ++ ) - for( IndexType gridXIdx = 0; gridXIdx < cudaXGrids; gridXIdx ++ ) - GridTraverser3D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... > - <<< cudaBlocks, cudaBlockSize, 0, s >>> - ( &gridPointer.template getData< Devices::Cuda >(), - userData, - begin, - end, - gridXIdx, - gridYIdx, - gridZIdx, - gridEntityParameters... ); - - // only launches into the stream 0 are synchronized - if( stream == 0 ) - { - cudaStreamSynchronize( s ); - TNL_CHECK_CUDA_DEVICE; - } - */ -} - } // namespace Meshes +} // namespace Meshes } // namespace TNL diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h index dd9562add377b02c3ab9ca91fa4804762182b46c..29fe8ffd67d0e7e0550abd6540391d24d5a0205d 100644 --- a/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h +++ b/src/TNL/Meshes/GridDetails/NeighborGridEntitiesStorage.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Meshes/DimensionTag.h> #include <TNL/Meshes/GridEntityConfig.h> #include <TNL/Meshes/GridDetails/NeighborGridEntityGetter.h> diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h index 84a9c56d9389f31c013206c10f63fceb81ec2e0c..f7a3cc180fa3da51b8830f8001e8d616e621f891 100644 --- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h +++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Assert.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Meshes/GridEntityConfig.h> namespace TNL { diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h index b8983feed92441a040ab8cab310afa36e8ef1b84..840a201c6021448e4f0de99552c4918364f92874 100644 --- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h +++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter1D_impl.h @@ -14,7 +14,7 @@ #include <TNL/Meshes/GridDetails/Grid1D.h> #include <TNL/Meshes/GridDetails/Grid2D.h> #include <TNL/Meshes/GridDetails/Grid3D.h> -#include <TNL/TemplateStaticFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> namespace TNL { namespace Meshes { @@ -176,7 +176,7 @@ class NeighborGridEntityGetter< void refresh( const GridType& grid, const IndexType& entityIndex ) { #ifndef HAVE_CUDA // TODO: fix it -- does not work with nvcc - TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilRefresher >::exec( *this, entityIndex ); #endif }; diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h index 5b0e48767f6b530388fd0dd92612c12b3b0fcbb8..d6f4ab24eddf9b016060925dcd822469b9fcb741 100644 --- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h +++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter2D_impl.h @@ -199,9 +199,9 @@ class NeighborGridEntityGetter< void refresh( const GridType& grid, const IndexType& entityIndex ) { #ifndef HAVE_CUDA // TODO: fix this to work with CUDA - TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex ); #endif }; diff --git a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h index 5fe5329bb3284ed578f502ad6828a84842fcc5eb..3cf2bb8d13e9121cfa4cd683282d78687bf4e446 100644 --- a/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h +++ b/src/TNL/Meshes/GridDetails/NeighborGridEntityGetter3D_impl.h @@ -14,7 +14,7 @@ #include <TNL/Meshes/GridDetails/Grid1D.h> #include <TNL/Meshes/GridDetails/Grid2D.h> #include <TNL/Meshes/GridDetails/Grid3D.h> -#include <TNL/TemplateStaticFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> namespace TNL { namespace Meshes { @@ -223,11 +223,11 @@ class NeighborGridEntityGetter< void refresh( const GridType& grid, const IndexType& entityIndex ) { #ifndef HAVE_CUDA // TODO: fix this to work with CUDA - TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex ); - TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilZRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilZRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, 0, StencilYRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, 1, stencilSize + 1, StencilYRefresher >::exec( *this, entityIndex ); + Algorithms::TemplateStaticFor< IndexType, -stencilSize, stencilSize + 1, StencilXRefresher >::exec( *this, entityIndex ); #endif }; diff --git a/src/TNL/Meshes/Mesh.h b/src/TNL/Meshes/Mesh.h index 589a862b9228bfbc7b1108c3caa51af6303532c0..4d71e3ac91231e3741ee61bafe06e12ff99320a2 100644 --- a/src/TNL/Meshes/Mesh.h +++ b/src/TNL/Meshes/Mesh.h @@ -79,10 +79,6 @@ class Mesh using RealType = typename PointType::RealType; using GlobalIndexVector = Containers::Vector< GlobalIndexType, DeviceType, GlobalIndexType >; - // shortcuts, compatibility with grids - using HostType = Mesh< MeshConfig, Devices::Host >; - using CudaType = Mesh< MeshConfig, Devices::Cuda >; - template< int Dimension > using EntityTraits = typename MeshTraitsType::template EntityTraits< Dimension >; @@ -110,10 +106,6 @@ class Mesh using Face = EntityType< getMeshDimension() - 1 >; using Vertex = EntityType< 0 >; - static String getType(); - - virtual String getTypeVirtual() const; - static String getSerializationType(); virtual String getSerializationTypeVirtual() const; diff --git a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h index 7630a2d64920de41dbbc3f3f2e17603d2d3aabc1..c956d3169a659dbb106a2a04e83ce39984a84646 100644 --- a/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h +++ b/src/TNL/Meshes/MeshDetails/EntityStorageRebinder.h @@ -28,8 +28,8 @@ #include <TNL/Meshes/DimensionTag.h> #include <TNL/Meshes/Mesh.h> #include <TNL/Pointers/DevicePointer.h> -#include <TNL/ParallelFor.h> -#include <TNL/TemplateStaticFor.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> namespace TNL { namespace Meshes { @@ -69,10 +69,10 @@ public: subentity.template bindSuperentitiesStorageNetwork< SuperdimensionTag::value >( superentitiesStorage->getValues( i ) ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, - kernel, - &meshPointer.template modifyData< DeviceType >(), - &superentitiesStoragePointer.template modifyData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, + kernel, + &meshPointer.template modifyData< DeviceType >(), + &superentitiesStoragePointer.template modifyData< DeviceType >() ); } }; @@ -109,10 +109,10 @@ public: superentity.template bindSubentitiesStorageNetwork< DimensionTag::value >( subentitiesStorage->getValues( i ) ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, - kernel, - &meshPointer.template modifyData< DeviceType >(), - &subentitiesStoragePointer.template modifyData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, + kernel, + &meshPointer.template modifyData< DeviceType >(), + &subentitiesStoragePointer.template modifyData< DeviceType >() ); } }; @@ -144,14 +144,14 @@ public: static void exec( Mesh& mesh ) { - TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh ); + Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, Inner >::execHost( mesh ); } }; public: static void exec( Mesh& mesh ) { - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, OuterLoop >::execHost( mesh ); } }; diff --git a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h index c98f145d0876b38c25ff7a77368de7b33f6f0656..bd9c02411283241b41a78fef4c008a71772f1ea6 100644 --- a/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h +++ b/src/TNL/Meshes/MeshDetails/IndexPermutationApplier.h @@ -156,27 +156,27 @@ public: }; Pointers::DevicePointer< Mesh > meshPointer( mesh ); - ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, - kernel1, - &meshPointer.template getData< DeviceType >(), - entities.getData(), - perm.getData() ); - ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, - kernel2, - &meshPointer.template modifyData< DeviceType >(), - entities.getData() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, + kernel1, + &meshPointer.template getData< DeviceType >(), + entities.getData(), + perm.getData() ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, entitiesCount, + kernel2, + &meshPointer.template modifyData< DeviceType >(), + entities.getData() ); // permute superentities storage - TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm ); + Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesStorageWorker >::execHost( mesh, perm ); // permute subentities storage - TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm ); + Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesStorageWorker >::execHost( mesh, perm ); // update superentity indices from the subentities - TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm ); + Algorithms::TemplateStaticFor< int, 0, Dimension, SubentitiesWorker >::execHost( mesh, iperm ); // update subentity indices from the superentities - TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm ); + Algorithms::TemplateStaticFor< int, Dimension + 1, Mesh::getMeshDimension() + 1, SuperentitiesWorker >::execHost( mesh, iperm ); } }; diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h index 64485dc3c7ecb31ebdeb9891830244776550e315..110fa9eefc1ca498435ce2fd11d1d84df2ad4410 100644 --- a/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h +++ b/src/TNL/Meshes/MeshDetails/MeshEntityIndex.h @@ -17,7 +17,7 @@ #pragma once #include <TNL/Assert.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> namespace TNL { namespace Meshes { diff --git a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h index e14e909a52d6824192e66e8d93d469a877ae70ca..80340c62cc0ff15104843b0ce7f3b1e6df7424e0 100644 --- a/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h +++ b/src/TNL/Meshes/MeshDetails/MeshEntityReferenceOrientation.h @@ -26,8 +26,8 @@ namespace Meshes { template< typename MeshConfig, typename EntityTopology > class MeshEntityReferenceOrientation { - typedef typename MeshTraits< MeshConfig >::LocalIndexType LocalIndexType; - typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType; + typedef typename MeshTraits< MeshConfig >::LocalIndexType LocalIndexType; + typedef typename MeshTraits< MeshConfig >::GlobalIndexType GlobalIndexType; public: typedef EntitySeed< MeshConfig, EntityTopology > SeedType; @@ -45,8 +45,6 @@ class MeshEntityReferenceOrientation this->cornerIdsMap.insert( std::make_pair( referenceCornerIds[i], i ) ); } } - - static String getType(){ return "MeshEntityReferenceOrientation"; }; EntityOrientation createOrientation( const SeedType& seed ) const { @@ -67,4 +65,3 @@ class MeshEntityReferenceOrientation } // namespace Meshes } // namespace TNL - diff --git a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h index 8bdd40570ab492484a2784791e1061a6f6fed6b5..5c7414b422976804cf819f266a312ad0d65cf40a 100644 --- a/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h +++ b/src/TNL/Meshes/MeshDetails/MeshEntity_impl.h @@ -80,11 +80,11 @@ template< typename MeshConfig, typename EntityTopology > String MeshEntity< MeshConfig, Device, EntityTopology >:: -getType() +getSerializationType() { - return String( "MeshEntity< " ) + - MeshConfig::getType() + ", " + - EntityTopology::getType() + " >"; + return String( "MeshEntity<" ) + + TNL::getSerializationType< MeshConfig >() + ", " + + TNL::getSerializationType< EntityTopology >() + ">"; } template< typename MeshConfig, @@ -92,9 +92,9 @@ template< typename MeshConfig, typename EntityTopology > String MeshEntity< MeshConfig, Device, EntityTopology >:: -getTypeVirtual() const +getSerializationTypeVirtual() const { - return this->getType(); + return this->getSerializationType(); } template< typename MeshConfig, @@ -242,17 +242,19 @@ operator=( const MeshEntity< MeshConfig, Device_, Topologies::Vertex >& entity ) template< typename MeshConfig, typename Device > String MeshEntity< MeshConfig, Device, Topologies::Vertex >:: -getType() +getSerializationType() { - return String( "MeshEntity< ... >" ); + return String( "MeshEntity<" ) + + TNL::getSerializationType< MeshConfig >() + ", " + + TNL::getSerializationType< Topologies::Vertex >() + ">"; } template< typename MeshConfig, typename Device > String MeshEntity< MeshConfig, Device, Topologies::Vertex >:: -getTypeVirtual() const +getSerializationTypeVirtual() const { - return this->getType(); + return this->getSerializationType(); } template< typename MeshConfig, typename Device > diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h index 21ccd0ccd4c2b82979d4d636b7ec9a039fcd4175..f29fec33e8473afce3b90ccc12bcfd3c37cac7d9 100644 --- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h +++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Initializer.h @@ -10,8 +10,8 @@ #pragma once -#include <TNL/TemplateStaticFor.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> +#include <TNL/Algorithms/ParallelFor.h> #include <TNL/Pointers/DevicePointer.h> #include <TNL/Meshes/DimensionTag.h> #include <TNL/Meshes/MeshDetails/traits/MeshEntityTraits.h> @@ -121,8 +121,8 @@ public: public: static void exec( Mesh& mesh ) { - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh ); - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, SetEntitiesCount >::execHost( mesh ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, ResetBoundaryTags >::execHost( mesh ); auto kernel = [] __cuda_callable__ ( GlobalIndexType faceIndex, @@ -136,17 +136,17 @@ public: const GlobalIndexType cellIndex = face.template getSuperentityIndex< Mesh::getMeshDimension() >( 0 ); mesh->template setIsBoundaryEntity< Mesh::getMeshDimension() >( cellIndex, true ); // initialize all subentities - TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() - 1, InitializeSubentities >::exec( *mesh, faceIndex, face ); } }; const GlobalIndexType facesCount = mesh.template getEntitiesCount< Mesh::getMeshDimension() - 1 >(); Pointers::DevicePointer< Mesh > meshPointer( mesh ); - ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount, - kernel, - &meshPointer.template modifyData< DeviceType >() ); + Algorithms::ParallelFor< DeviceType >::exec( (GlobalIndexType) 0, facesCount, + kernel, + &meshPointer.template modifyData< DeviceType >() ); - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, UpdateBoundaryIndices >::execHost( mesh ); } }; diff --git a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h index 873475bcab18a4bc1003843a527b2337d69328e3..e31c76dae0a4ebd5555cd0340c4d732e49451e02 100644 --- a/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h +++ b/src/TNL/Meshes/MeshDetails/MeshLayers/BoundaryTags/Layer.h @@ -116,8 +116,8 @@ public: } // TODO: parallelize directly on the device else { - using BoundaryTagsHostArray = typename BoundaryTagsArray::HostType; - using OrderingHostArray = typename OrderingArray::HostType; + using BoundaryTagsHostArray = typename BoundaryTagsArray::template Self< typename BoundaryTagsArray::ValueType, Devices::Host >; + using OrderingHostArray = typename OrderingArray::template Self< typename OrderingArray::ValueType, Devices::Host >; BoundaryTagsHostArray hostBoundaryTags; OrderingHostArray hostBoundaryIndices; diff --git a/src/TNL/Meshes/MeshDetails/Mesh_impl.h b/src/TNL/Meshes/MeshDetails/Mesh_impl.h index 4fbd3ba07a01b79d054720e39a3abf43327f493c..4b0488b2d05f1cb9a36cd4ee157f0b355d679d63 100644 --- a/src/TNL/Meshes/MeshDetails/Mesh_impl.h +++ b/src/TNL/Meshes/MeshDetails/Mesh_impl.h @@ -91,28 +91,12 @@ getMeshDimension() return MeshTraitsType::meshDimension; } -template< typename MeshConfig, typename Device > -String -Mesh< MeshConfig, Device >:: -getType() -{ - return String( "Meshes::Mesh< ") + MeshConfig::getType() + " >"; -} - -template< typename MeshConfig, typename Device > -String -Mesh< MeshConfig, Device >:: -getTypeVirtual() const -{ - return this->getType(); -} - template< typename MeshConfig, typename Device > String Mesh< MeshConfig, Device >:: getSerializationType() { - return Mesh::getType(); + return String( "Meshes::Mesh< ") + TNL::getType< MeshConfig >() + " >"; } template< typename MeshConfig, typename Device > diff --git a/src/TNL/Meshes/MeshDetails/Traverser_impl.h b/src/TNL/Meshes/MeshDetails/Traverser_impl.h index 5dedf58fd2ceea521e51ece53e42d0efd65caec1..2ce07addfff74fc55fd26981cc97f8704341aaf2 100644 --- a/src/TNL/Meshes/MeshDetails/Traverser_impl.h +++ b/src/TNL/Meshes/MeshDetails/Traverser_impl.h @@ -11,8 +11,7 @@ #pragma once #include <TNL/Meshes/Traverser.h> - -#include <TNL/Exceptions/CudaSupportMissing.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Meshes { @@ -20,221 +19,87 @@ namespace Meshes { template< typename Mesh, typename MeshEntity, int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void Traverser< Mesh, MeshEntity, EntitiesDimension >:: processBoundaryEntities( const MeshPointer& meshPointer, - UserData& userData ) const + UserData userData ) const { - auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >(); -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) { - const auto entityIndex = meshPointer->template getBoundaryEntityIndex< EntitiesDimension >( i ); - auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex ); + const GlobalIndexType entitiesCount = meshPointer->template getBoundaryEntitiesCount< MeshEntity::getEntityDimension() >(); + auto kernel = [] __cuda_callable__ + ( const GlobalIndexType i, + const Mesh* mesh, + UserData userData ) + { + const GlobalIndexType entityIndex = mesh->template getBoundaryEntityIndex< MeshEntity::getEntityDimension() >( i ); + auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex ); // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex - EntitiesProcessor::processEntity( *meshPointer, userData, entity ); - } + EntitiesProcessor::processEntity( *mesh, userData, entity ); + }; + Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); + Algorithms::ParallelFor< DeviceType >::exec( + (GlobalIndexType) 0, entitiesCount, + kernel, + &meshPointer.template getData< DeviceType >(), + userData ); } template< typename Mesh, typename MeshEntity, int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void Traverser< Mesh, MeshEntity, EntitiesDimension >:: processInteriorEntities( const MeshPointer& meshPointer, - UserData& userData ) const + UserData userData ) const { - auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >(); -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( decltype(entitiesCount) i = 0; i < entitiesCount; i++ ) { - const auto entityIndex = meshPointer->template getInteriorEntityIndex< EntitiesDimension >( i ); - auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex ); + const auto entitiesCount = meshPointer->template getInteriorEntitiesCount< MeshEntity::getEntityDimension() >(); + auto kernel = [] __cuda_callable__ + ( const GlobalIndexType i, + const Mesh* mesh, + UserData userData ) + { + const GlobalIndexType entityIndex = mesh->template getInteriorEntityIndex< MeshEntity::getEntityDimension() >( i ); + auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex ); // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex - EntitiesProcessor::processEntity( *meshPointer, userData, entity ); - } + EntitiesProcessor::processEntity( *mesh, userData, entity ); + }; + Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); + Algorithms::ParallelFor< DeviceType >::exec( + (GlobalIndexType) 0, entitiesCount, + kernel, + &meshPointer.template getData< DeviceType >(), + userData ); } template< typename Mesh, typename MeshEntity, int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void Traverser< Mesh, MeshEntity, EntitiesDimension >:: processAllEntities( const MeshPointer& meshPointer, - UserData& userData ) const + UserData userData ) const { - auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >(); -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( decltype(entitiesCount) entityIndex = 0; entityIndex < entitiesCount; entityIndex++ ) { - auto& entity = meshPointer->template getEntity< EntitiesDimension >( entityIndex ); - // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex - EntitiesProcessor::processEntity( *meshPointer, userData, entity ); - } -} - - -#ifdef HAVE_CUDA -template< int EntitiesDimension, - typename EntitiesProcessor, - typename Mesh, - typename UserData > -__global__ void -MeshTraverserBoundaryEntitiesKernel( const Mesh* mesh, - UserData userData, - typename Mesh::GlobalIndexType entitiesCount ) -{ - for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x; - i < entitiesCount; - i += blockDim.x * gridDim.x ) + const auto entitiesCount = meshPointer->template getEntitiesCount< MeshEntity::getEntityDimension() >(); + auto kernel = [] __cuda_callable__ + ( const GlobalIndexType entityIndex, + const Mesh* mesh, + UserData userData ) { - const auto entityIndex = mesh->template getBoundaryEntityIndex< EntitiesDimension >( i ); - auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex ); + auto& entity = mesh->template getEntity< MeshEntity::getEntityDimension() >( entityIndex ); // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex EntitiesProcessor::processEntity( *mesh, userData, entity ); - } -} - -template< int EntitiesDimension, - typename EntitiesProcessor, - typename Mesh, - typename UserData > -__global__ void -MeshTraverserInteriorEntitiesKernel( const Mesh* mesh, - UserData userData, - typename Mesh::GlobalIndexType entitiesCount ) -{ - for( typename Mesh::GlobalIndexType i = blockIdx.x * blockDim.x + threadIdx.x; - i < entitiesCount; - i += blockDim.x * gridDim.x ) - { - const auto entityIndex = mesh->template getInteriorEntityIndex< EntitiesDimension >( i ); - auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex ); - // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex - EntitiesProcessor::processEntity( *mesh, userData, entity ); - } -} - -template< int EntitiesDimension, - typename EntitiesProcessor, - typename Mesh, - typename UserData > -__global__ void -MeshTraverserAllEntitiesKernel( const Mesh* mesh, - UserData userData, - typename Mesh::GlobalIndexType entitiesCount ) -{ - for( typename Mesh::GlobalIndexType entityIndex = blockIdx.x * blockDim.x + threadIdx.x; - entityIndex < entitiesCount; - entityIndex += blockDim.x * gridDim.x ) - { - auto& entity = mesh->template getEntity< EntitiesDimension >( entityIndex ); - // TODO: if the Mesh::IdType is void, then we should also pass the entityIndex - EntitiesProcessor::processEntity( *mesh, userData, entity ); - } -} -#endif - -template< typename MeshConfig, - typename MeshEntity, - int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > -void -Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >:: -processBoundaryEntities( const MeshPointer& meshPointer, - UserData& userData ) const -{ -#ifdef HAVE_CUDA - auto entitiesCount = meshPointer->template getBoundaryEntitiesCount< EntitiesDimension >(); - - dim3 blockSize( 256 ); - dim3 gridSize; - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) ); - - Devices::Cuda::synchronizeDevice(); - MeshTraverserBoundaryEntitiesKernel< EntitiesDimension, EntitiesProcessor > - <<< gridSize, blockSize >>> - ( &meshPointer.template getData< Devices::Cuda >(), - userData, - entitiesCount ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename MeshConfig, - typename MeshEntity, - int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > -void -Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >:: -processInteriorEntities( const MeshPointer& meshPointer, - UserData& userData ) const -{ -#ifdef HAVE_CUDA - auto entitiesCount = meshPointer->template getInteriorEntitiesCount< EntitiesDimension >(); - - dim3 blockSize( 256 ); - dim3 gridSize; - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) ); - - Devices::Cuda::synchronizeDevice(); - MeshTraverserInteriorEntitiesKernel< EntitiesDimension, EntitiesProcessor > - <<< gridSize, blockSize >>> - ( &meshPointer.template getData< Devices::Cuda >(), - userData, - entitiesCount ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif -} - -template< typename MeshConfig, - typename MeshEntity, - int EntitiesDimension > - template< typename UserData, - typename EntitiesProcessor > -void -Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension >:: -processAllEntities( const MeshPointer& meshPointer, - UserData& userData ) const -{ -#ifdef HAVE_CUDA - auto entitiesCount = meshPointer->template getEntitiesCount< EntitiesDimension >(); - - dim3 blockSize( 256 ); - dim3 gridSize; - const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); - gridSize.x = min( desGridSize, Devices::Cuda::getNumberOfBlocks( entitiesCount, blockSize.x ) ); - - Devices::Cuda::synchronizeDevice(); - MeshTraverserAllEntitiesKernel< EntitiesDimension, EntitiesProcessor > - <<< gridSize, blockSize >>> - ( &meshPointer.template getData< Devices::Cuda >(), - userData, - entitiesCount ); - cudaDeviceSynchronize(); - TNL_CHECK_CUDA_DEVICE; -#else - throw Exceptions::CudaSupportMissing(); -#endif + }; + Pointers::synchronizeSmartPointersOnDevice< DeviceType >(); + Algorithms::ParallelFor< DeviceType >::exec( + (GlobalIndexType) 0, entitiesCount, + kernel, + &meshPointer.template getData< DeviceType >(), + userData ); } } // namespace Meshes diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h index 7058691a301d569fdb8e230ba2889f4929e409c9..ada83b5fb0cf3a3af6d67e5559746d4743c5f71d 100644 --- a/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h +++ b/src/TNL/Meshes/MeshDetails/initializer/EntityInitializer.h @@ -16,8 +16,6 @@ #pragma once -#include <TNL/TemplateStaticFor.h> - #include <TNL/Meshes/MeshDetails/initializer/EntitySeed.h> #include <TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h> #include <TNL/Meshes/MeshDetails/initializer/SuperentityStorageInitializer.h> @@ -68,8 +66,6 @@ class EntityInitializer using InitializerType = Initializer< MeshConfig >; public: - static String getType() { return "EntityInitializer"; }; - static void initEntity( EntityType& entity, const GlobalIndexType& entityIndex, const SeedType& entitySeed, InitializerType& initializer) { initializer.setEntityIndex( entity, entityIndex ); @@ -91,8 +87,6 @@ public: using PointType = typename MeshTraits< MeshConfig >::PointType; using InitializerType = Initializer< MeshConfig >; - static String getType() { return "EntityInitializer"; }; - static void initEntity( VertexType& entity, const GlobalIndexType& entityIndex, const PointType& point, InitializerType& initializer) { initializer.setEntityIndex( entity, entityIndex ); diff --git a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h index b9d34d070114bf40f71d79df1a0d266f545d7347..41439c4056905523596b9bbbf76df035f14ca4cd 100644 --- a/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h +++ b/src/TNL/Meshes/MeshDetails/initializer/EntitySeed.h @@ -40,8 +40,6 @@ class EntitySeed using HashType = EntitySeedHash< EntitySeed >; using KeyEqual = EntitySeedEq< EntitySeed >; - static String getType() { return String( "EntitySeed<>" ); } - static constexpr LocalIndexType getCornersCount() { return SubvertexTraits::count; @@ -82,8 +80,6 @@ class EntitySeed< MeshConfig, Topologies::Vertex > using HashType = EntitySeedHash< EntitySeed >; using KeyEqual = EntitySeedEq< EntitySeed >; - static String getType() { return String( "EntitySeed<>" ); } - static constexpr LocalIndexType getCornersCount() { return 1; diff --git a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h index 8ad08bd63d3b9a6a1be186ff73baa34370da2d28..30cbb31e6a960b2cd00e8faafe5262b7ef17e472 100644 --- a/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h +++ b/src/TNL/Meshes/MeshDetails/initializer/SubentitySeedsCreator.h @@ -16,7 +16,7 @@ #pragma once -#include <TNL/TemplateStaticFor.h> +#include <TNL/Algorithms/TemplateStaticFor.h> #include <TNL/Meshes/MeshDetails/traits/MeshTraits.h> namespace TNL { @@ -47,7 +47,7 @@ public: static SubentitySeedArray create( const SubvertexAccessorType& subvertices ) { SubentitySeedArray subentitySeeds; - TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices ); + Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITIES_COUNT, CreateSubentitySeeds >::execHost( subentitySeeds, subvertices ); return subentitySeeds; } @@ -61,7 +61,7 @@ private: public: static void exec( SubentitySeedArray& subentitySeeds, const SubvertexAccessorType& subvertices ) { - TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices ); + Algorithms::TemplateStaticFor< LocalIndexType, 0, SUBENTITY_VERTICES_COUNT, SetSubentitySeedVertex >::execHost( subentitySeeds[ subentityIndex ], subvertices ); } private: diff --git a/src/TNL/Meshes/MeshEntity.h b/src/TNL/Meshes/MeshEntity.h index 6e0970ade32564f03aa16380f86164c109a79cb7..b1c8afea57430ae06a7b59c6d9aba6c495b26017 100644 --- a/src/TNL/Meshes/MeshEntity.h +++ b/src/TNL/Meshes/MeshEntity.h @@ -72,9 +72,9 @@ class MeshEntity MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity ); - static String getType(); + static String getSerializationType(); - String getTypeVirtual() const; + String getSerializationTypeVirtual() const; void save( File& file ) const; @@ -166,9 +166,9 @@ class MeshEntity< MeshConfig, Device, Topologies::Vertex > MeshEntity& operator=( const MeshEntity< MeshConfig, Device_, EntityTopology >& entity ); - static String getType(); + static String getSerializationType(); - String getTypeVirtual() const; + String getSerializationTypeVirtual() const; void save( File& file ) const; diff --git a/src/TNL/Meshes/Topologies/Edge.h b/src/TNL/Meshes/Topologies/Edge.h index 4c4b8269e51a7de9f709f28868b6fc09d517f757..298e638c0badc54f50921e058a348a4f829557bb 100644 --- a/src/TNL/Meshes/Topologies/Edge.h +++ b/src/TNL/Meshes/Topologies/Edge.h @@ -22,15 +22,10 @@ namespace TNL { namespace Meshes { namespace Topologies { - + struct Edge { static constexpr int dimension = 1; - - static String getType() - { - return "Topologies::Edge"; - } }; diff --git a/src/TNL/Meshes/Topologies/Hexahedron.h b/src/TNL/Meshes/Topologies/Hexahedron.h index db922f3b5bc487a7160e74c471597826e1461584..af0765db510d569fb264f5845074298d5ca69809 100644 --- a/src/TNL/Meshes/Topologies/Hexahedron.h +++ b/src/TNL/Meshes/Topologies/Hexahedron.h @@ -25,11 +25,6 @@ namespace Topologies { struct Hexahedron { static constexpr int dimension = 3; - - static String getType() - { - return "Topologies::Hexahedron"; - } }; template<> diff --git a/src/TNL/Meshes/Topologies/Quadrilateral.h b/src/TNL/Meshes/Topologies/Quadrilateral.h index 6b5d4eb54b68f01f6fef9932cd66c3c897d1d72d..50be274e2e4d9671c79cf974ac4ac509893f2fb0 100644 --- a/src/TNL/Meshes/Topologies/Quadrilateral.h +++ b/src/TNL/Meshes/Topologies/Quadrilateral.h @@ -25,11 +25,6 @@ namespace Topologies { struct Quadrilateral { static constexpr int dimension = 2; - - static String getType() - { - return "Topologies::Quadrilateral"; - } }; diff --git a/src/TNL/Meshes/Topologies/Simplex.h b/src/TNL/Meshes/Topologies/Simplex.h index a9cbee72ba890f971611066c4908a0ffb0f55f29..3b61f09fdec62d0f9613f8c56db5cc22e1080180 100644 --- a/src/TNL/Meshes/Topologies/Simplex.h +++ b/src/TNL/Meshes/Topologies/Simplex.h @@ -25,15 +25,9 @@ namespace Meshes { namespace Topologies { template< int dimension_ > -class Simplex +struct Simplex { - public: - static constexpr int dimension = dimension_; - - static String getType() - { - return String( "Topologies::Simplex< " ) + convertToString( dimension ) + " >"; - } + static constexpr int dimension = dimension_; }; namespace SimplexDetails { diff --git a/src/TNL/Meshes/Topologies/Tetrahedron.h b/src/TNL/Meshes/Topologies/Tetrahedron.h index 7722f5ef688ad41cd3b594e8ec3a5484aae17b9e..048daa1c3c3fe7cc112489e2d40411c4f4ad47b2 100644 --- a/src/TNL/Meshes/Topologies/Tetrahedron.h +++ b/src/TNL/Meshes/Topologies/Tetrahedron.h @@ -25,11 +25,6 @@ namespace Topologies { struct Tetrahedron { static constexpr int dimension = 3; - - static String getType() - { - return "Topologies::Tetrahedron"; - } }; template<> diff --git a/src/TNL/Meshes/Topologies/Triangle.h b/src/TNL/Meshes/Topologies/Triangle.h index 11d1c8a846c360f22d46fb676f411146d33662bd..efe031059d1fa5e7705dc131d69de4a862743ed7 100644 --- a/src/TNL/Meshes/Topologies/Triangle.h +++ b/src/TNL/Meshes/Topologies/Triangle.h @@ -25,11 +25,6 @@ namespace Topologies { struct Triangle { static constexpr int dimension = 2; - - static String getType() - { - return "Topologies::Triangle"; - } }; diff --git a/src/TNL/Meshes/Topologies/Vertex.h b/src/TNL/Meshes/Topologies/Vertex.h index cff78e37d6e64f3798791e26510864aaed44955c..f90127624806c4acbfb8e0ee2aa01b5988f0c3f4 100644 --- a/src/TNL/Meshes/Topologies/Vertex.h +++ b/src/TNL/Meshes/Topologies/Vertex.h @@ -25,11 +25,6 @@ namespace Topologies { struct Vertex { static constexpr int dimension = 0; - - static String getType() - { - return "Topologies::Vertex"; - } }; } // namespace Topologies diff --git a/src/TNL/Meshes/Traverser.h b/src/TNL/Meshes/Traverser.h index 017084ae8d39f03927b5a038a62776abe7a1f588..f157e3afcd56fb6ffebf95865d7546490fb00668 100644 --- a/src/TNL/Meshes/Traverser.h +++ b/src/TNL/Meshes/Traverser.h @@ -18,54 +18,30 @@ namespace Meshes { template< typename Mesh, typename MeshEntity, + // extra parameter which is used only for specializations implementing grid traversers int EntitiesDimension = MeshEntity::getEntityDimension() > class Traverser { public: using MeshType = Mesh; - using MeshPointer = Pointers::SharedPointer< MeshType >; + using MeshPointer = Pointers::SharedPointer< MeshType >; using DeviceType = typename MeshType::DeviceType; + using GlobalIndexType = typename MeshType::GlobalIndexType; - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void processBoundaryEntities( const MeshPointer& meshPointer, - UserData& userData ) const; + UserData userData ) const; - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void processInteriorEntities( const MeshPointer& meshPointer, - UserData& userData ) const; + UserData userData ) const; - template< typename UserData, - typename EntitiesProcessor > + template< typename EntitiesProcessor, + typename UserData > void processAllEntities( const MeshPointer& meshPointer, - UserData& userData ) const; -}; - -template< typename MeshConfig, - typename MeshEntity, - int EntitiesDimension > -class Traverser< Mesh< MeshConfig, Devices::Cuda >, MeshEntity, EntitiesDimension > -{ - public: - using MeshType = Mesh< MeshConfig, Devices::Cuda >; - using MeshPointer = Pointers::SharedPointer< MeshType >; - using DeviceType = typename MeshType::DeviceType; - - template< typename UserData, - typename EntitiesProcessor > - void processBoundaryEntities( const MeshPointer& meshPointer, - UserData& userData ) const; - - template< typename UserData, - typename EntitiesProcessor > - void processInteriorEntities( const MeshPointer& meshPointer, - UserData& userData ) const; - - template< typename UserData, - typename EntitiesProcessor > - void processAllEntities( const MeshPointer& meshPointer, - UserData& userData ) const; + UserData userData ) const; }; } // namespace Meshes diff --git a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h index 46c4e9c58f30f3d7abf9c04de6071d64f5e34977..b92148fa90ddf5c0d8a46e90aa817d9b8531ffd2 100644 --- a/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h +++ b/src/TNL/Meshes/TypeResolver/MeshTypeResolver_impl.h @@ -74,7 +74,7 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs... resolveWorldDimension( const Reader& reader, ProblemSetterArgs&&... problemSetterArgs ) { - std::cerr << "The cell topology " << CellTopology::getType() << " is disabled in the build configuration." << std::endl; + std::cerr << "The cell topology " << getType< CellTopology >() << " is disabled in the build configuration." << std::endl; return false; } @@ -334,9 +334,9 @@ MeshTypeResolver< Reader, ConfigTag, Device, ProblemSetter, ProblemSetterArgs... resolveTerminate( const Reader& reader, ProblemSetterArgs&&... problemSetterArgs ) { - std::cerr << "The mesh config type " << TNL::getType< MeshConfig >() << " is disabled in the build configuration for device " << Device::getDeviceType() << "." << std::endl; + std::cerr << "The mesh config type " << getType< MeshConfig >() << " is disabled in the build configuration for device " << getType< Device >() << "." << std::endl; return false; -}; +} template< typename Reader, typename ConfigTag, @@ -352,7 +352,7 @@ resolveTerminate( const Reader& reader, { using MeshType = Meshes::Mesh< MeshConfig, Device >; return ProblemSetter< MeshType >::run( std::forward<ProblemSetterArgs>(problemSetterArgs)... ); -}; +} } // namespace Meshes } // namespace TNL diff --git a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h index 6c60126457d1d4aa33426631b836407be6a412d7..d45016af193656cb6094f3782b304a92c224bf2a 100644 --- a/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h +++ b/src/TNL/Meshes/TypeResolver/TypeResolver_impl.h @@ -149,7 +149,7 @@ loadMesh( const String& fileName, std::cerr << "I am not able to load the mesh from the file " << fileName << ". " "Perhaps the mesh stored in the file is not supported by the mesh " "passed to the loadMesh function? The mesh type is " - << mesh.getType() << std::endl; + << getType< decltype(mesh) >() << std::endl; return false; } return true; diff --git a/src/TNL/Meshes/Writers/VTKWriter_impl.h b/src/TNL/Meshes/Writers/VTKWriter_impl.h index e6c3eca442aed486ade87326480f0e7070b04b1c..83cf95ec4ca18f9c1b1375b9e89197884cdd8a5e 100644 --- a/src/TNL/Meshes/Writers/VTKWriter_impl.h +++ b/src/TNL/Meshes/Writers/VTKWriter_impl.h @@ -407,10 +407,10 @@ VTKWriter< Mesh >::writeAllEntities( const Mesh& mesh, std::ostream& str ) const Index cellsListSize = __impl::getCellsListSize( mesh ); str << std::endl << "CELLS " << allEntitiesCount << " " << cellsListSize << std::endl; - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntitiesWriter >::exec( mesh, str ); str << std::endl << "CELL_TYPES " << allEntitiesCount << std::endl; - TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str ); + Algorithms::TemplateStaticFor< int, 0, Mesh::getMeshDimension() + 1, EntityTypesWriter >::exec( mesh, str ); } template< typename Mesh > diff --git a/src/TNL/Object.h b/src/TNL/Object.h index 24ced9a5c2ee9ac97f8d2a7a86e51e88085d13a4..ff7432635a13379c83bf49af758f1da7dd8f6b28 100644 --- a/src/TNL/Object.h +++ b/src/TNL/Object.h @@ -12,7 +12,6 @@ #include <vector> -#include <TNL/Devices/CudaCallable.h> #include <TNL/String.h> #include <TNL/File.h> @@ -25,8 +24,7 @@ namespace TNL { * \brief Basic class for majority of TNL objects like matrices, meshes, grids, solvers, etc.. * * Objects like numerical meshes, matrices large vectors etc. are inherited by - * this class. This class introduces virtual method \ref getType which is - * supposed to tell the object type in a C++ style. + * this class. * * Since the virtual destructor is not defined as \ref __cuda_callable__, * objects inherited from Object should not be created in CUDA kernels. @@ -42,28 +40,6 @@ class Object { public: - /** - * \brief Static type getter. - * - * Returns the type in C++ style - for example the returned value - * may look as \c "Array< double, Devices::Cuda, int >". - * - * \par Example - * \include ObjectExample_getType.cpp - * \par Output - * \include ObjectExample_getType.out - */ - static String getType(); - - /*** - * \brief Virtual type getter. - * - * Returns the type in C++ style - for example the returned value - * may look as \c "Array< double, Devices::Cuda, int >". - * See example at \ref Object::getType. - */ - virtual String getTypeVirtual() const; - /** * \brief Static serialization type getter. * @@ -71,7 +47,6 @@ class Object * is supposed to return the object type but with the device type replaced * by Devices::Host. For example \c Array< double, Devices::Cuda > is * saved as \c Array< double, Devices::Host >. - * See example at \ref Object::getType. */ static String getSerializationType(); @@ -82,7 +57,6 @@ class Object * is supposed to return the object type but with the device type replaced * by Devices::Host. For example \c Array< double, Devices::Cuda > is * saved as \c Array< double, Devices::Host >. - * See example at \ref Object::getType. */ virtual String getSerializationTypeVirtual() const; @@ -128,9 +102,7 @@ class Object * Since it is not defined as \ref __cuda_callable__, objects inherited * from Object should not be created in CUDA kernels. */ -#ifndef HAVE_MIC virtual ~Object(){}; -#endif }; /** diff --git a/src/TNL/Object.hpp b/src/TNL/Object.hpp index eeec8bf9840b2da7a72db00c49fd64ea660fcc6d..25c709212016025b6bdf18953f60380cb982edfc 100644 --- a/src/TNL/Object.hpp +++ b/src/TNL/Object.hpp @@ -20,16 +20,6 @@ namespace TNL { static constexpr char magic_number[] = "TNLMN"; -inline String Object::getType() -{ - return String( "Object" ); -} - -inline String Object::getTypeVirtual() const -{ - return this->getType(); -} - inline String Object::getSerializationType() { return String( "Object" ); diff --git a/src/TNL/Operators/Advection/LaxFridrichs.h b/src/TNL/Operators/Advection/LaxFridrichs.h index d1fbd399e52737404063f6c016b19ac3743c8587..45a8abae727dc67bba817cadfc6e0deec7a2d853 100644 --- a/src/TNL/Operators/Advection/LaxFridrichs.h +++ b/src/TNL/Operators/Advection/LaxFridrichs.h @@ -61,8 +61,6 @@ class LaxFridrichs< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; @@ -153,8 +151,6 @@ class LaxFridrichs< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; @@ -251,8 +247,6 @@ class LaxFridrichs< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; diff --git a/src/TNL/Operators/Advection/Upwind.h b/src/TNL/Operators/Advection/Upwind.h index e41768e571082b9e7be7b547d915b4bf1e91340f..942ec29566235b0a7f5a7476f06e0e72e5356519 100644 --- a/src/TNL/Operators/Advection/Upwind.h +++ b/src/TNL/Operators/Advection/Upwind.h @@ -61,8 +61,6 @@ class Upwind< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, Veloc return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; @@ -154,8 +152,6 @@ class Upwind< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, Veloc return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; @@ -256,8 +252,6 @@ class Upwind< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, Veloc return true; } - static String getType(); - void setViscosity(const Real& artificalViscosity) { this->artificialViscosity = artificalViscosity; diff --git a/src/TNL/Operators/ExactFunctionInverseOperator.h b/src/TNL/Operators/ExactFunctionInverseOperator.h index 7833d05445fd6c49f0971a5c57c0d7c4cdcea37c..6fe0b2f127c8e3c86f3d61a2e1605ef5ea1a1d90 100644 --- a/src/TNL/Operators/ExactFunctionInverseOperator.h +++ b/src/TNL/Operators/ExactFunctionInverseOperator.h @@ -25,12 +25,6 @@ class ExactFunctionInverseOperator { public: - static String getType() - { - return String( "ExactFunctionInverseOperator< " ) + - String( Dimension) + " >"; - } - InnerOperator& getInnerOperator() { return this->innerOperator; diff --git a/src/TNL/Operators/ExactIdentityOperator.h b/src/TNL/Operators/ExactIdentityOperator.h index 7c39938df87fb6a1863d82ea13d54e2e4e482c1a..22d7bbdd86c81bb4ce41eb3aa0b0058c95082b1a 100644 --- a/src/TNL/Operators/ExactIdentityOperator.h +++ b/src/TNL/Operators/ExactIdentityOperator.h @@ -23,12 +23,6 @@ class ExactIdentityOperator { public: - static String getType() - { - return String( "ExactIdentityOperator< " ) + - String( Dimension) + " >"; - } - template< typename Function > __cuda_callable__ typename Function::RealType diff --git a/src/TNL/Operators/FunctionInverseOperator.h b/src/TNL/Operators/FunctionInverseOperator.h index 1265f3b70bdc7b5c5264289e1a39b71aa0801533..5bfb5c37f5eb0ef9a2f407c0cf6c81333a227075 100644 --- a/src/TNL/Operators/FunctionInverseOperator.h +++ b/src/TNL/Operators/FunctionInverseOperator.h @@ -36,11 +36,6 @@ class FunctionInverseOperator FunctionInverseOperator( const OperatorType& operator_ ) : operator_( operator_ ) {}; - static String getType() - { - return String( "FunctionInverseOperator< " ) + OperatorType::getType() + " >"; - } - const OperatorType& getOperator() const { return this->operator_; } template< typename MeshFunction, diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h index 790fa0777996839904b2c8270720dde20afc8946..f0a927d3465aa9168acd793eb43158264283489d 100644 --- a/src/TNL/Operators/diffusion/ExactLinearDiffusion.h +++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion.h @@ -32,8 +32,6 @@ class ExactLinearDiffusion< 1 > : public Functions::Domain< 1, Functions::SpaceD static const int Dimension = 1; - static String getType(); - template< typename Function > __cuda_callable__ inline typename Function::RealType operator()( const Function& function, @@ -47,8 +45,6 @@ class ExactLinearDiffusion< 2 > : public Functions::Domain< 2, Functions::SpaceD public: static const int Dimension = 2; - - static String getType(); template< typename Function > __cuda_callable__ inline @@ -63,8 +59,6 @@ class ExactLinearDiffusion< 3 > : public Functions::Domain< 3 > public: static const int Dimension = 3; - - static String getType(); template< typename Function > __cuda_callable__ inline diff --git a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h index 0aabb1027e38d3390ca009813e7ac1bf54cb006d..60a27d9c07799c180124e87faad6c2d3fb4df4de 100644 --- a/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h +++ b/src/TNL/Operators/diffusion/ExactLinearDiffusion_impl.h @@ -19,13 +19,6 @@ namespace TNL { namespace Operators { -String -ExactLinearDiffusion< 1 >:: -getType() -{ - return "ExactLinearDiffusion< 1 >"; -} - template< typename Function > __cuda_callable__ inline typename Function::RealType @@ -37,13 +30,6 @@ operator()( const Function& function, return function.template getPartialDerivative< 2, 0, 0 >( v, time ); } -String -ExactLinearDiffusion< 2 >:: -getType() -{ - return "ExactLinearDiffusion< 2 >"; -} - template< typename Function > __cuda_callable__ inline typename Function::RealType @@ -56,13 +42,6 @@ operator()( const Function& function, function.template getPartialDerivative< 0, 2, 0 >( v, time ); } -String -ExactLinearDiffusion< 3 >:: -getType() -{ - return "ExactLinearDiffusion< 3 >"; -} - template< typename Function > __cuda_callable__ inline typename Function::RealType diff --git a/src/TNL/Operators/diffusion/ExactMeanCurvature.h b/src/TNL/Operators/diffusion/ExactMeanCurvature.h index fbc2260efad49c99337d7af994bc3c61ef89c90e..a96d5a090acb0a1798409a665c1f98c001dbbc5b 100644 --- a/src/TNL/Operators/diffusion/ExactMeanCurvature.h +++ b/src/TNL/Operators/diffusion/ExactMeanCurvature.h @@ -34,13 +34,6 @@ class ExactMeanCurvature typedef ExactFunctionInverseOperator< Dimension, ExactGradientNormType > FunctionInverse; typedef ExactNonlinearDiffusion< Dimension, FunctionInverse > NonlinearDiffusion; - static String getType() - { - return String( "ExactMeanCurvature< " ) + - String( Dimension) + ", " + - InnerOperator::getType() + " >"; - } - template< typename Real > void setRegularizationEpsilon( const Real& eps) { diff --git a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h index 25381e2bb48bcdbdbde8f419c1cac856f621d7fd..826796751fd72328a7bd5213faa2c631842cf8bd 100644 --- a/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h +++ b/src/TNL/Operators/diffusion/ExactNonlinearDiffusion.h @@ -37,11 +37,6 @@ class ExactNonlinearDiffusion< 1, Nonlinearity, InnerOperator > { public: - static String getType() - { - return "ExactNonlinearDiffusion< 1, " + Nonlinearity::getType() + " >"; - }; - Nonlinearity& getNonlinearity() { return this->nonlinearity; @@ -91,11 +86,6 @@ class ExactNonlinearDiffusion< 2, Nonlinearity, InnerOperator > { public: - static String getType() - { - return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 2 >"; - }; - Nonlinearity& getNonlinearity() { return this->nonlinearity; @@ -150,11 +140,6 @@ class ExactNonlinearDiffusion< 3, Nonlinearity, InnerOperator > { public: - static String getType() - { - return "ExactNonlinearDiffusion< " + Nonlinearity::getType() + ", 3 >"; - } - Nonlinearity& getNonlinearity() { return this->nonlinearity; diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h index efb17555547b53da1b850b789b6956674c1abbdf..e98d21c11d3975aa3306df35e9812e7a1e3420f3 100644 --- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h +++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator.h @@ -49,8 +49,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 1,MeshReal, Device, MeshIndex typedef Index IndexType; typedef OperatorQ OperatorQType; - static String getType(); - template< typename MeshEntity, typename Vector > __cuda_callable__ @@ -102,8 +100,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshInde typedef OperatorQ OperatorQType; - static String getType(); - template< typename MeshEntity, typename Vector > __cuda_callable__ @@ -155,8 +151,6 @@ class FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshInde typedef Index IndexType; typedef OperatorQ OperatorQType; - static String getType(); - template< typename MeshEntity, typename Vector > __cuda_callable__ diff --git a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h index 083160467875cc0e4f40b15c63b7cf59c222a68b..3d496bd52ea4f144122014422dabd46cca678801 100644 --- a/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h +++ b/src/TNL/Operators/diffusion/FiniteVolumeNonlinearOperator_impl.h @@ -21,23 +21,6 @@ namespace TNL { namespace Operators { -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename OperatorQ > -String -FiniteVolumeNonlinearOperator< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -getType() -{ - return String( "FiniteVolumeNonlinearOperator< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", " + - OperatorQ::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -98,23 +81,6 @@ setMatrixElements( const RealType& time, typename Matrix::MatrixRow matrixRow = matrix.getRow( index ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename OperatorQ > -String -FiniteVolumeNonlinearOperator< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -getType() -{ - return String( "FiniteVolumeNonlinearOperator< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", " + - OperatorQ::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -201,23 +167,6 @@ setMatrixElements( const RealType& time, matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 0, 1 >(), eCoef ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename OperatorQ > -String -FiniteVolumeNonlinearOperator< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, OperatorQ, Real, Index >:: -getType() -{ - return String( "FiniteVolumeNonlinearOperator< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", " + - OperatorQ::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/TNL/Operators/diffusion/LinearDiffusion.h b/src/TNL/Operators/diffusion/LinearDiffusion.h index e31113800f1c790ee630e7a99ae8d046fe039a5c..33e493d02c5e80bdd0d9ec6fdb40a519c4082f81 100644 --- a/src/TNL/Operators/diffusion/LinearDiffusion.h +++ b/src/TNL/Operators/diffusion/LinearDiffusion.h @@ -55,8 +55,6 @@ class LinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, Inde static const int Dimension = MeshType::getMeshDimension(); static constexpr int getMeshDimension() { return Dimension; } - - static String getType(); template< typename PreimageFunction, typename MeshEntity > @@ -107,8 +105,6 @@ class LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Ind static constexpr int getMeshDimension() { return Dimension; } - static String getType(); - template< typename PreimageFunction, typename EntityType > __cuda_callable__ inline Real operator()( const PreimageFunction& u, @@ -157,8 +153,6 @@ class LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Ind static constexpr int getMeshDimension() { return Dimension; } - static String getType(); - template< typename PreimageFunction, typename EntityType > __cuda_callable__ diff --git a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h index 83a20829ccc4f46a56eb80b1e474990db23856da..51bdf8a62372f82acf85b941f9de580b6d69c6a2 100644 --- a/src/TNL/Operators/diffusion/LinearDiffusion_impl.h +++ b/src/TNL/Operators/diffusion/LinearDiffusion_impl.h @@ -22,21 +22,6 @@ namespace TNL { namespace Operators { -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -109,21 +94,6 @@ setMatrixElements( const PreimageFunction& u, matrixRow.setElement( 2, neighborEntities.template getEntityIndex< 1 >(), - lambdaX ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -203,22 +173,6 @@ setMatrixElements( const PreimageFunction& u, matrixRow.setElement( 4, neighborEntities.template getEntityIndex< 0, 1 >(), -lambdaY ); } - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -LinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "LinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h index 0dbc269883acadf563cf6fa6f5c28c185e24436f..97f9ec2be1b51137d3a891daabe4ea4213fdfd4a 100644 --- a/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h +++ b/src/TNL/Operators/diffusion/NonlinearDiffusion_impl.h @@ -23,23 +23,6 @@ namespace TNL { namespace Operators { -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename NonlinearDiffusionOperator > -String -NonlinearDiffusion< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -getType() -{ - return String( "NonlinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + "," + - NonlinearDiffusionOperator::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -100,23 +83,6 @@ setMatrixElements( const RealType& time, nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename NonlinearDiffusionOperator > -String -NonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -getType() -{ - return String( "NonlinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + "," + - NonlinearDiffusionOperator::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -177,23 +143,6 @@ setMatrixElements( const RealType& time, nonlinearDiffusionOperator.setMatrixElements( time, tau, mesh, index, entity, u, b, matrix ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index, - typename NonlinearDiffusionOperator > -String -NonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, NonlinearDiffusionOperator, Real, Index >:: -getType() -{ - return String( "NonlinearDiffusion< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + "," + - NonlinearDiffusionOperator::getType() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h index 6e99d2f053bd40774356518ceac3e4428738eb0c..0c8767981189ea02f87fdf5da89a4c48203bee0a 100644 --- a/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h +++ b/src/TNL/Operators/diffusion/OneSidedMeanCurvature.h @@ -55,14 +55,6 @@ class OneSidedMeanCurvature nonlinearity( nonlinearityOperator, nonlinearityBoundaryConditions, meshPointer ), nonlinearDiffusion( nonlinearity ){} - static String getType() - { - return String( "OneSidedMeanCurvature< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setRegularizationEpsilon( const RealType& eps ) { this->gradientNorm.setEps( eps ); diff --git a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h index b74abe0b06de5898e86679dcd537d9656cbfcc9b..9691ab32b169e3ff82cd48f04827611859c3e1ef 100644 --- a/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h +++ b/src/TNL/Operators/diffusion/OneSidedNonlinearDiffusion.h @@ -52,15 +52,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity ) : nonlinearity( nonlinearity ){} - - static String getType() - { - return String( "OneSidedNonlinearDiffusion< " ) + - MeshType::getType() + ", " + - Nonlinearity::getType() + "," + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > @@ -146,15 +137,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 2, MeshReal, Device, MeshIndex > OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity ) : nonlinearity( nonlinearity ){} - - static String getType() - { - return String( "OneSidedNonlinearDiffusion< " ) + - MeshType::getType() + ", " + - Nonlinearity::getType() + "," + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > @@ -255,15 +237,6 @@ class OneSidedNonlinearDiffusion< Meshes::Grid< 3, MeshReal, Device, MeshIndex > OneSidedNonlinearDiffusion( const Nonlinearity& nonlinearity ) : nonlinearity( nonlinearity ){} - - static String getType() - { - return String( "OneSidedNonlinearDiffusion< " ) + - MeshType::getType() + ", " + - Nonlinearity::getType() + "," + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } template< typename MeshFunction, typename MeshEntity > diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs.h b/src/TNL/Operators/euler/fvm/LaxFridrichs.h index aaf44f5214def6e0cea92f0c49e1534f0915a1cf..10d1c7995d8564d0f3d9dbea087f8bf6200be620 100644 --- a/src/TNL/Operators/euler/fvm/LaxFridrichs.h +++ b/src/TNL/Operators/euler/fvm/LaxFridrichs.h @@ -42,8 +42,6 @@ class LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, Pressu LaxFridrichs(); - static String getType(); - void getExplicitUpdate( const IndexType centralVolume, RealType& rho_t, RealType& rho_u1_t, diff --git a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h index aa9c8059a51b0925ce3474a1169e64ff6cd8e0b1..963ef76019d4a8e26980b829941025a6e10f30df 100644 --- a/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h +++ b/src/TNL/Operators/euler/fvm/LaxFridrichs_impl.h @@ -27,19 +27,6 @@ LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, { } -template< typename Real, - typename Device, - typename Index, - typename PressureGradient, - template< int, typename, typename, typename > class GridGeometry > -String LaxFridrichs< Meshes::Grid< 2, Real, Device, Index, GridGeometry >, - PressureGradient > :: getType() -{ - return String( "LaxFridrichs< " ) + - Meshes::Grid< 2, Real, Device, Index, GridGeometry > :: getType() + ", " + - PressureGradient :: getType() + " >"; -} - template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Operators/fdm/BackwardFiniteDifference.h b/src/TNL/Operators/fdm/BackwardFiniteDifference.h index cceaa807f49961d3efe1cfda844c13ff3daa435e..16282c73e75f9cfa2f7538bc8dd2cefdb1f3a096 100644 --- a/src/TNL/Operators/fdm/BackwardFiniteDifference.h +++ b/src/TNL/Operators/fdm/BackwardFiniteDifference.h @@ -50,17 +50,6 @@ class BackwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, M static constexpr int getMeshDimension() { return Dimension; } - static String getType() - { - return String( "BackwardFiniteDifference< " ) + - MeshType::getType() + ", " + - String( XDifference ) + ", " + - String( YDifference ) + ", " + - String( ZDifference ) + ", " + - TNL::getType< RealType >() + ", " + - TNL::getType< IndexType >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ inline Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/fdm/CentralFiniteDifference.h b/src/TNL/Operators/fdm/CentralFiniteDifference.h index feecc62e77800c94b63417aad765b0c404d58a22..51a645be12d42812cdbf9622f5e5d7aa40b8f581 100644 --- a/src/TNL/Operators/fdm/CentralFiniteDifference.h +++ b/src/TNL/Operators/fdm/CentralFiniteDifference.h @@ -50,18 +50,6 @@ class CentralFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me //static constexpr int getMeshDimension() { return Dimension; } - static String getType() - { - return String( "CentralFiniteDifference< " ) + - MeshType::getType() + ", " + - String( XDifference ) + ", " + - String( YDifference ) + ", " + - String( ZDifference ) + ", " + - TNL::getType< RealType >() + ", " + - TNL::getType< IndexType >() + " >"; - } - - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ inline Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/fdm/ExactDifference.h b/src/TNL/Operators/fdm/ExactDifference.h index 5efffc1b8436a529a4478561fde9326bc323787c..58c6936445ed2fe847672dc8aa5285e46cedac43 100644 --- a/src/TNL/Operators/fdm/ExactDifference.h +++ b/src/TNL/Operators/fdm/ExactDifference.h @@ -22,15 +22,6 @@ class ExactDifference { public: - static String getType() - { - return String( "ExactDifference< " ) + - String( Dimension ) + ", " + - String( XDerivative ) + ", " + - String( YDerivative ) + ", " + - String( ZDerivative ) + " >"; - } - template< typename Function > __cuda_callable__ typename Function::RealType operator()( diff --git a/src/TNL/Operators/fdm/ForwardFiniteDifference.h b/src/TNL/Operators/fdm/ForwardFiniteDifference.h index 53602afec21eb7bc1dc416c7647306297895643e..bbfe29bc10f9eb48ae011917788cdadf0444cc1a 100644 --- a/src/TNL/Operators/fdm/ForwardFiniteDifference.h +++ b/src/TNL/Operators/fdm/ForwardFiniteDifference.h @@ -51,18 +51,6 @@ class ForwardFiniteDifference< Meshes::Grid< Dimension, MeshReal, MeshDevice, Me static constexpr int getMeshDimension() { return Dimension; } - static String getType() - { - return String( "ForwardFiniteDifference< " ) + - MeshType::getType() + ", " + - String( XDifference ) + ", " + - String( YDifference ) + ", " + - String( ZDifference ) + ", " + - TNL::getType< RealType >() + ", " + - TNL::getType< IndexType >() + " >"; - } - - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ inline Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/geometric/CoFVMGradientNorm.h b/src/TNL/Operators/geometric/CoFVMGradientNorm.h index 2af779a11fdb2664bdfb636425dc8218e09199c7..37fabed7d4934be55e1327c0482de5dc3620a9bf 100644 --- a/src/TNL/Operators/geometric/CoFVMGradientNorm.h +++ b/src/TNL/Operators/geometric/CoFVMGradientNorm.h @@ -58,15 +58,6 @@ class CoFVMGradientNorm< Meshes::Grid< MeshDimension, MeshReal, Device, MeshInde : BaseType( outerOperator, innerOperator, mesh ) {} - static String getType() - { - return String( "CoFVMGradientNorm< " ) + - MeshType::getType() + ", " + - String( MeshDimension ) + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - void setEps( const RealType& eps ) { this->getInnerOperator().setEps( eps ); @@ -100,14 +91,6 @@ class CoFVMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, 0, Real, CoFVMGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "CoFVMGradientNorm< " ) + - MeshType::getType() + ", 0, " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -160,16 +143,6 @@ class CoFVMGradientNorm< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, 1, Real CoFVMGradientNorm() : epsSquare( 0.0 ){} - - static String getType() - { - return String( "CoFVMGradientNorm< " ) + - MeshType::getType() + ", 1, " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -278,14 +251,6 @@ class CoFVMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, 2, Real CoFVMGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "CoFVMGradientNorm< " ) + - MeshType::getType() + ", 2, " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/geometric/ExactGradientNorm.h b/src/TNL/Operators/geometric/ExactGradientNorm.h index cf7e3384820be1c2e8628fe655211037c6dde1dd..121e0181b7df9f309c6087428a4f2e2d8ff8790d 100644 --- a/src/TNL/Operators/geometric/ExactGradientNorm.h +++ b/src/TNL/Operators/geometric/ExactGradientNorm.h @@ -30,11 +30,6 @@ class ExactGradientNorm< 1, Real > { public: - static String getType() - { - return "ExactGradientNorm< 1 >"; - } - ExactGradientNorm() : epsilonSquare( 0.0 ){}; @@ -98,11 +93,6 @@ class ExactGradientNorm< 2, Real > { public: - static String getType() - { - return "ExactGradientNorm< 2 >"; - } - ExactGradientNorm() : epsilonSquare( 0.0 ){}; @@ -172,11 +162,6 @@ class ExactGradientNorm< 3, Real > { public: - static String getType() - { - return "ExactGradientNorm< 3 >"; - } - ExactGradientNorm() : epsilonSquare( 0.0 ){}; diff --git a/src/TNL/Operators/geometric/FDMGradientNorm.h b/src/TNL/Operators/geometric/FDMGradientNorm.h index a5eb4536317a0ff5258a681585f67557ff029d59..f42216a433e24d4be7dd25ff3beacb476612c26a 100644 --- a/src/TNL/Operators/geometric/FDMGradientNorm.h +++ b/src/TNL/Operators/geometric/FDMGradientNorm.h @@ -50,14 +50,6 @@ class FDMGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Difference FDMGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "FDMGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -107,16 +99,6 @@ class FDMGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Difference FDMGradientNorm() : epsSquare( 0.0 ){} - - static String getType() - { - return String( "FDMGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -173,14 +155,6 @@ class FDMGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Differenc FDMGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "FDMGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h index 2d86167b1c1ee3a9466635c2605931248d80eb56..a1624b4089092a6402ff03e772c9de9ad24533e1 100644 --- a/src/TNL/Operators/geometric/TwoSidedGradientNorm.h +++ b/src/TNL/Operators/geometric/TwoSidedGradientNorm.h @@ -46,14 +46,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Real, TwoSidedGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "TwoSidedGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -99,16 +91,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Real, TwoSidedGradientNorm() : epsSquare( 0.0 ){} - - static String getType() - { - return String( "TwoSidedGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -162,14 +144,6 @@ class TwoSidedGradientNorm< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real TwoSidedGradientNorm() : epsSquare( 0.0 ){} - static String getType() - { - return String( "TwoSidedGradientNorm< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; - } - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h index 7f145198f049c2062e0e53d33b215e0fe8c0a3b8..bfe41697f3d7a7bf21b0ac9dfcd7ac7ed0a57f13 100644 --- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h +++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ.h @@ -42,8 +42,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename Vector > IndexType bind( Vector& u) { return 0; } @@ -96,8 +94,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename Vector > IndexType bind( Vector& u) { return 0; } @@ -149,8 +145,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename Vector > IndexType bind( Vector& u) { return 0; } @@ -202,8 +196,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename Vector > Index bind( Vector& u); @@ -255,8 +247,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; typedef SharedVector< RealType, DeviceType, IndexType > DofVectorType; - - static String getType(); template< typename Vector > Index bind( Vector& u); @@ -309,8 +299,6 @@ class tnlFiniteVolumeOperatorQ< Meshes::Grid< 3,MeshReal, Device, MeshIndex >, R typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; - - static String getType(); template< typename Vector > Index bind( Vector& u); diff --git a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h index 0fae70006b3b9f69c8157cfcf2ad5538489b8f1d..184f1955d420b6b033128f484161aad3643a0750 100644 --- a/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h +++ b/src/TNL/Operators/operator-Q/tnlFiniteVolumeOperatorQ_impl.h @@ -16,36 +16,6 @@ namespace TNL { namespace Operators { -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 0 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 0 >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index, 1 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 1 >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -165,36 +135,6 @@ operator()( return 0.0; } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 0 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 0 >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index, 1 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 1 >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -372,36 +312,6 @@ operator()( const MeshType& mesh, return q.getElement( entity.getIndex() ); } -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 0 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 0 >"; -} - -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlFiniteVolumeOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index, 1 >:: -getType() -{ - return String( "tnlFiniteVolumeOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + ", 1 >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h index a96d22f5134029fb9686150713696da47ff05bfc..aff3917c31c05476ecb325627a0f002aa0a3bf44 100644 --- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h +++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ.h @@ -37,8 +37,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 1,MeshReal, Device, MeshIndex >, Re typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -74,8 +72,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 2,MeshReal, Device, MeshIndex >, Re typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, @@ -112,8 +108,6 @@ class tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, R typedef Device DeviceType; typedef Index IndexType; - static String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h index 21f5e44f08ec29fe365de11cfcbb5fb898f9af26..6291e0a5314a78765f13c11d9ae748281c5c6d0b 100644 --- a/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h +++ b/src/TNL/Operators/operator-Q/tnlOneSideDiffOperatorQ_impl.h @@ -16,21 +16,6 @@ namespace TNL { namespace Operators { -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlOneSideDiffOperatorQ< Meshes::Grid< 1, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "tnlOneSideDiffOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -92,21 +77,6 @@ getValueStriped( const MeshFunction& u, /*** * 2D */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlOneSideDiffOperatorQ< Meshes::Grid< 2, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "tnlOneSideDiffOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, @@ -177,21 +147,6 @@ getValueStriped( const MeshFunction& u, /*** * 3D */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -String -tnlOneSideDiffOperatorQ< Meshes::Grid< 3, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{ - return String( "tnlOneSideDiffOperatorQ< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h index 33a20e255ce950bfa9714504185c1f236ad4abed..f3a73c88a40c3dd8b4f99e6cb1fbc602009a5124 100644 --- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h +++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature.h @@ -29,8 +29,6 @@ class ExactOperatorCurvature< OperatorQ, 1 > enum { Dimension = 1 }; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType > __cuda_callable__ static Real getValue( const Function& function, @@ -46,8 +44,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 2 > enum { Dimension = 2 }; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType > __cuda_callable__ static Real getValue( const Function& function, @@ -62,8 +58,6 @@ class ExactOperatorCurvature< ExactOperatorQ, 3 > enum { Dimension = 3 }; - static String getType(); - template< int XDiffOrder = 0, int YDiffOrder = 0, int ZDiffOrder = 0, typename Function, typename Point, typename Real = typename Point::RealType > __cuda_callable__ static Real getValue( const Function& function, diff --git a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h index dfc03a72799708b3e70741f8cbd4a354b8b076d6..8408cfe17c0e74dce6afa0550019304b018eaa89 100644 --- a/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h +++ b/src/TNL/Operators/operator-curvature/ExactOperatorCurvature_impl.h @@ -15,14 +15,6 @@ namespace TNL { namespace Operators { -template< typename ExactOperatorQ > -String -ExactOperatorCurvature< ExactOperatorQ, 1 >:: -getType() -{ - return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",1 >"; -} - template< typename OperatorQ > template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real > __cuda_callable__ @@ -41,14 +33,6 @@ getValue( const Function& function, return 0; } -template< typename ExactOperatorQ > -String -ExactOperatorCurvature< ExactOperatorQ, 2 >:: -getType() -{ - return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",2 >"; -} - template< int XDiffOrder, int YDiffOrder, int ZDiffOrder, typename Function, typename Point, typename Real > __cuda_callable__ Real @@ -68,13 +52,5 @@ getValue( const Function& function, return 0; } -template< typename ExactOperatorQ > -String -ExactOperatorCurvature< ExactOperatorQ, 3 >:: -getType() -{ - return "ExactOperatorCurvature< " + ExactOperatorQ::getType() + ",3 >"; -} - } // namespace Operators } // namespace TNL diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h index b0c0a934fa0dee01ebe4bb2d93abe3e6d0d36b68..5276c3ed465938e7e7fcdfde2885dc8986cac3b5 100644 --- a/src/TNL/Pointers/DevicePointer.h +++ b/src/TNL/Pointers/DevicePointer.h @@ -15,8 +15,10 @@ #include <TNL/Allocators/Default.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> #include <TNL/Pointers/SmartPointer.h> +#include <TNL/Pointers/SmartPointersRegister.h> +#include <TNL/TypeInfo.h> +#include <TNL/Cuda/MemoryHelpers.h> #include <cstring> // std::memcpy, std::memcmp @@ -405,7 +407,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer ~DevicePointer() { this->free(); - Devices::Cuda::removeSmartPointer( this ); + getSmartPointersRegister< DeviceType >().remove( this ); } protected: @@ -422,10 +424,10 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer this->pointer = &obj; this->pd = new PointerData(); // pass to device - this->cuda_pointer = Devices::Cuda::passToDevice( *this->pointer ); + this->cuda_pointer = Cuda::passToDevice( *this->pointer ); // set last-sync state this->set_last_sync_state(); - Devices::Cuda::insertSmartPointer( this ); + getSmartPointersRegister< DeviceType >().insert( this ); return true; } @@ -456,7 +458,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer delete this->pd; this->pd = nullptr; if( this->cuda_pointer ) - Devices::Cuda::freeFromDevice( this->cuda_pointer ); + Cuda::freeFromDevice( this->cuda_pointer ); } } } @@ -470,288 +472,9 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer Object* cuda_pointer; }; -/**** - * Specialization for MIC - */ - -#ifdef HAVE_MIC -template< typename Object > -class DevicePointer< Object, Devices::MIC > : public SmartPointer -{ - private: - // Convenient template alias for controlling the selection of copy- and - // move-constructors and assignment operators using SFINAE. - // The type Object_ is "enabled" iff Object_ and Object are not the same, - // but after removing const and volatile qualifiers they are the same. - template< typename Object_ > - using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value && - std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >; - - // friend class will be needed for templated assignment operators - template< typename Object_, typename Device_ > - friend class DevicePointer; - - public: - - typedef Object ObjectType; - typedef Devices::MIC DeviceType; - - explicit DevicePointer( ObjectType& obj ) - : pointer( nullptr ), - pd( nullptr ), - mic_pointer( nullptr ) - { - this->allocate( obj ); - } - - // this is needed only to avoid the default compiler-generated constructor - DevicePointer( const DevicePointer& pointer ) - : pointer( pointer.pointer ), - pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - this->pd->counter += 1; - } - - // conditional constructor for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - DevicePointer( const DevicePointer< Object_, DeviceType >& pointer ) - : pointer( pointer.pointer ), - pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - this->pd->counter += 1; - } - - // this is needed only to avoid the default compiler-generated constructor - DevicePointer( DevicePointer&& pointer ) - : pointer( pointer.pointer ), - pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - pointer.pointer = nullptr; - pointer.pd = nullptr; - pointer.mic_pointer = nullptr; - } - - // conditional constructor for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - DevicePointer( DevicePointer< Object_, DeviceType >&& pointer ) - : pointer( pointer.pointer ), - pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - pointer.pointer = nullptr; - pointer.pd = nullptr; - pointer.mic_pointer = nullptr; - } - - const Object* operator->() const - { - return this->pointer; - } - - Object* operator->() - { - this->pd->maybe_modified = true; - return this->pointer; - } - - const Object& operator *() const - { - return *( this->pointer ); - } - - Object& operator *() - { - this->pd->maybe_modified = true; - return *( this->pointer ); - } - - operator bool() - { - return this->pd; - } - - template< typename Device = Devices::Host > - __cuda_callable__ - const Object& getData() const - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT( this->pointer, ); - TNL_ASSERT( this->pd, ); - TNL_ASSERT( this->mic_pointer, ); - if( std::is_same< Device, Devices::Host >::value ) - return *( this->pointer ); - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - } - - template< typename Device = Devices::Host > - __cuda_callable__ - Object& modifyData() - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT( this->pointer, ); - TNL_ASSERT( this->pd, ); - TNL_ASSERT( this->mic_pointer, ); - if( std::is_same< Device, Devices::Host >::value ) - { - this->pd->maybe_modified = true; - return *( this->pointer ); - } - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - } - - // this is needed only to avoid the default compiler-generated operator - const DevicePointer& operator=( const DevicePointer& ptr ) - { - this->free(); - this->pointer = ptr.pointer; - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - this->pd->counter += 1; - return *this; - } - - // conditional operator for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - const DevicePointer& operator=( const DevicePointer< Object_, DeviceType >& ptr ) - { - this->free(); - this->pointer = ptr.pointer; - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - this->pd->counter += 1; - return *this; - } - - // this is needed only to avoid the default compiler-generated operator - const DevicePointer& operator=( DevicePointer&& ptr ) - { - this->free(); - this->pointer = ptr.pointer; - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - ptr.pointer = nullptr; - ptr.pd = nullptr; - ptr.mic_pointer = nullptr; - return *this; - } - - // conditional operator for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - const DevicePointer& operator=( DevicePointer< Object_, DeviceType >&& ptr ) - { - this->free(); - this->pointer = ptr.pointer; - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - ptr.pointer = nullptr; - ptr.pd = nullptr; - ptr.mic_pointer = nullptr; - return *this; - } - - bool synchronize() - { - if( ! this->pd ) - return true; - if( this->modified() ) - { - TNL_ASSERT( this->pointer, ); - TNL_ASSERT( this->mic_pointer, ); - Devices::MIC::CopyToMIC((void*) this->mic_pointer, (void*) this->pointer, sizeof( ObjectType )); - this->set_last_sync_state(); - return true; - } - return true; - - } - - ~DevicePointer() - { - this->free(); - Devices::MIC::removeSmartPointer( this ); - } - - protected: - - struct PointerData - { - char data_image[ sizeof(Object) ]; - int counter = 1; - bool maybe_modified = false; - }; - - bool allocate( ObjectType& obj ) - { - this->pointer = &obj; - this->pd = new PointerData(); - if( ! this->pd ) - return false; - // pass to device - this->mic_pointer = Allocators:::MIC< ObjectType >().allocate(1); - if( ! this->mic_pointer ) - return false; - Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*)this->pointer,sizeof(ObjectType)); - - // set last-sync state - this->set_last_sync_state(); - Devices::MIC::insertSmartPointer( this ); - return true; - } - - void set_last_sync_state() - { - TNL_ASSERT( this->pointer, ); - TNL_ASSERT( this->pd, ); - std::memcpy( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) ); - this->pd->maybe_modified = false; - } - - bool modified() - { - TNL_ASSERT( this->pointer, ); - TNL_ASSERT( this->pd, ); - // optimization: skip bitwise comparison if we're sure that the data is the same - if( ! this->pd->maybe_modified ) - return false; - return std::memcmp( (void*) &this->pd->data_image, (void*) this->pointer, sizeof( Object ) ) != 0; - } - - void free() - { - if( this->pd ) - { - if( ! --this->pd->counter ) - { - delete this->pd; - this->pd = nullptr; - if( this->mic_pointer ) - Allocators:::MIC< ObjectType >().deallocate(this->mic_pointer, 1); - } - } - } - - Object* pointer; - - PointerData* pd; - - // mic_pointer can't be part of PointerData structure, since we would be - // unable to dereference this-pd on the device - Object* mic_pointer; -}; -#endif - } // namespace Pointers -#if (!defined(NDEBUG)) && (!defined(HAVE_MIC)) +#ifndef NDEBUG namespace Assert { template< typename Object, typename Device > @@ -761,8 +484,8 @@ struct Formatter< Pointers::DevicePointer< Object, Device > > printToString( const Pointers::DevicePointer< Object, Device >& value ) { ::std::stringstream ss; - ss << "(DevicePointer< " << Object::getType() << ", " << Device::getDeviceType() - << " > object at " << &value << ")"; + ss << "(" + getType< Pointers::DevicePointer< Object, Device > >() + << " object at " << &value << ")"; return ss.str(); } }; diff --git a/src/TNL/Pointers/SharedPointer.h b/src/TNL/Pointers/SharedPointer.h index e6908e47953b330b612ea9ec8a2421d8c11bc8a9..93f63f807c5038795c53cc0c5182571ab2d8a9c4 100644 --- a/src/TNL/Pointers/SharedPointer.h +++ b/src/TNL/Pointers/SharedPointer.h @@ -15,28 +15,10 @@ #include <cstring> #include <type_traits> #include <TNL/Assert.h> +#include <TNL/TypeInfo.h> //#define TNL_DEBUG_SHARED_POINTERS -#ifdef TNL_DEBUG_SHARED_POINTERS - #include <typeinfo> - #include <cxxabi.h> - #include <iostream> - #include <string> - #include <memory> - #include <cstdlib> - - inline - std::string demangle(const char* mangled) - { - int status; - std::unique_ptr<char[], void (*)(void*)> result( - abi::__cxa_demangle(mangled, 0, 0, &status), std::free); - return result.get() ? std::string(result.get()) : "error occurred"; - } -#endif - - namespace TNL { namespace Pointers { @@ -49,7 +31,7 @@ class SharedPointer } // namespace Pointers -#if (!defined(NDEBUG)) && (!defined(HAVE_MIC)) +#ifndef NDEBUG namespace Assert { template< typename Object, typename Device > @@ -59,7 +41,7 @@ struct Formatter< Pointers::SharedPointer< Object, Device > > printToString( const Pointers::SharedPointer< Object, Device >& value ) { ::std::stringstream ss; - ss << "(SharedPointer< " << Object::getType() << ", " << Device::getDeviceType() + ss << "(" + getType< Pointers::SharedPointer< Object, Device > >() << " > object at " << &value << ")"; return ss.str(); } @@ -72,4 +54,3 @@ struct Formatter< Pointers::SharedPointer< Object, Device > > #include <TNL/Pointers/SharedPointerHost.h> #include <TNL/Pointers/SharedPointerCuda.h> -#include <TNL/Pointers/SharedPointerMic.h> diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h index 2cf1b297f8f1dbdbbb95d7bf3630df8e48242988..54dd4ee3c71c7eb461de7d8c906fd42dc96af1c6 100644 --- a/src/TNL/Pointers/SharedPointerCuda.h +++ b/src/TNL/Pointers/SharedPointerCuda.h @@ -16,32 +16,13 @@ #include <TNL/Devices/Cuda.h> #include <TNL/Pointers/SmartPointer.h> +#include <TNL/Pointers/SmartPointersRegister.h> +#include <TNL/Cuda/MemoryHelpers.h> #include <cstring> // std::memcpy, std::memcmp #include <cstddef> // std::nullptr_t #include <algorithm> // swap -//#define TNL_DEBUG_SHARED_POINTERS - -#ifdef TNL_DEBUG_SHARED_POINTERS - #include <typeinfo> - #include <cxxabi.h> - #include <iostream> - #include <string> - #include <memory> - #include <cstdlib> - - inline - std::string demangle(const char* mangled) - { - int status; - std::unique_ptr<char[], void (*)(void*)> result( - abi::__cxa_demangle(mangled, 0, 0, &status), std::free); - return result.get() ? std::string(result.get()) : "error occurred"; - } -#endif - - namespace TNL { namespace Pointers { @@ -78,7 +59,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer : pd( nullptr ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl; #endif this->allocate( args... ); } @@ -119,7 +100,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer bool recreate( Args... args ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl; #endif if( ! this->counter ) return this->allocate( args... ); @@ -377,7 +358,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer bool recreate( Args... args ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl; #endif if( ! this->pd ) return this->allocate( args... ); @@ -478,7 +459,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer if( this->pd != nullptr ) this->pd->counter += 1; #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; #endif return *this; } @@ -494,7 +475,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer if( this->pd != nullptr ) this->pd->counter += 1; #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; #endif return *this; } @@ -508,7 +489,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer ptr.pd = nullptr; ptr.cuda_pointer = nullptr; #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; #endif return *this; } @@ -524,7 +505,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer ptr.pd = nullptr; ptr.cuda_pointer = nullptr; #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; #endif return *this; } @@ -537,7 +518,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer if( this->modified() ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl; + std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << getType< ObjectType >() << std::endl; std::cerr << " ( " << sizeof( Object ) << " bytes, CUDA adress " << this->cuda_pointer << " )" << std::endl; #endif TNL_ASSERT( this->cuda_pointer, ); @@ -566,7 +547,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer ~SharedPointer() { this->free(); - Devices::Cuda::removeSmartPointer( this ); + getSmartPointersRegister< DeviceType >().remove( this ); } protected: @@ -591,13 +572,13 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer { this->pd = new PointerData( args... ); // pass to device - this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data ); + this->cuda_pointer = Cuda::passToDevice( this->pd->data ); // set last-sync state this->set_last_sync_state(); #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl; + std::cerr << "Created shared pointer to " << getType< ObjectType >() << " (cuda_pointer = " << this->cuda_pointer << ")" << std::endl; #endif - Devices::Cuda::insertSmartPointer( this ); + getSmartPointersRegister< DeviceType >().insert( this ); return true; } @@ -622,14 +603,14 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer if( this->pd ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", cuda_pointer = " << this->cuda_pointer << ", type: " << getType< ObjectType >() << std::endl; #endif if( ! --this->pd->counter ) { delete this->pd; this->pd = nullptr; if( this->cuda_pointer ) - Devices::Cuda::freeFromDevice( this->cuda_pointer ); + Cuda::freeFromDevice( this->cuda_pointer ); #ifdef TNL_DEBUG_SHARED_POINTERS std::cerr << "...deleted data." << std::endl; #endif diff --git a/src/TNL/Pointers/SharedPointerHost.h b/src/TNL/Pointers/SharedPointerHost.h index 48d83c93890a12ef2b4d0dfb7a466490578f24b9..39a6d4da4a2b8ab8b964110173d1716fade1ac71 100644 --- a/src/TNL/Pointers/SharedPointerHost.h +++ b/src/TNL/Pointers/SharedPointerHost.h @@ -15,7 +15,7 @@ #include "SharedPointer.h" #include <TNL/Devices/Host.h> -#include <TNL/Devices/CudaCallable.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Pointers/SmartPointer.h> #include <cstddef> // std::nullptr_t @@ -54,7 +54,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer : pd( nullptr ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Creating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Creating shared pointer to " << getType< ObjectType >() << std::endl; #endif this->allocate( args... ); } @@ -95,7 +95,7 @@ class SharedPointer< Object, Devices::Host > : public SmartPointer bool recreate( Args... args ) { #ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; + std::cerr << "Recreating shared pointer to " << getType< ObjectType >() << std::endl; #endif if( ! this->counter ) return this->allocate( args... ); diff --git a/src/TNL/Pointers/SharedPointerMic.h b/src/TNL/Pointers/SharedPointerMic.h deleted file mode 100644 index 0c2958b4ad7c6552f58363c98dca5104908f04cc..0000000000000000000000000000000000000000 --- a/src/TNL/Pointers/SharedPointerMic.h +++ /dev/null @@ -1,373 +0,0 @@ -/*************************************************************************** - SharedPointerMic.h - description - ------------------- - begin : Aug 22, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Tomas Oberhuber, Jakub Klinkovsky - -#pragma once - -#include "SharedPointer.h" - -#include <TNL/Allocators/Default.h> -#include <TNL/Devices/MIC.h> -#include <TNL/Pointers/SmartPointer.h> - -#include <cstring> // std::memcpy, std::memcmp -#include <cstddef> // std::nullptr_t -#include <algorithm> // swap - -namespace TNL { -namespace Pointers { - -#ifdef HAVE_MIC -template< typename Object> -class SharedPointer< Object, Devices::MIC > : public SmartPointer -{ - private: - // Convenient template alias for controlling the selection of copy- and - // move-constructors and assignment operators using SFINAE. - // The type Object_ is "enabled" iff Object_ and Object are not the same, - // but after removing const and volatile qualifiers they are the same. - template< typename Object_ > - using Enabler = std::enable_if< ! std::is_same< Object_, Object >::value && - std::is_same< typename std::remove_cv< Object >::type, Object_ >::value >; - - // friend class will be needed for templated assignment operators - template< typename Object_, typename Device_> - friend class SharedPointer; - - public: - - using ObjectType = Object; - using DeviceType = Devices::MIC; - - SharedPointer( std::nullptr_t ) - : pd( nullptr ), - mic_pointer( nullptr ) - {} - - template< typename... Args > - explicit SharedPointer( Args... args ) - : pd( nullptr ), - mic_pointer( nullptr ) - { - this->allocate( args... ); - } - - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( const SharedPointer& pointer ) - : pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - this->pd->counter += 1; - } - - // conditional constructor for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - SharedPointer( const SharedPointer< Object_, DeviceType >& pointer ) - : pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - this->pd->counter += 1; - } - - // this is needed only to avoid the default compiler-generated constructor - SharedPointer( SharedPointer&& pointer ) - : pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - pointer.pd = nullptr; - pointer.mic_pointer = nullptr; - } - - // conditional constructor for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - SharedPointer( SharedPointer< Object_, DeviceType >&& pointer ) - : pd( (PointerData*) pointer.pd ), - mic_pointer( pointer.mic_pointer ) - { - pointer.pd = nullptr; - pointer.mic_pointer = nullptr; - } - - template< typename... Args > - bool recreate( Args... args ) - { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Recreating shared pointer to " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - if( ! this->pd ) - return this->allocate( args... ); - - if( this->pd->counter == 1 ) - { - /**** - * The object is not shared -> recreate it in-place, without reallocation - */ - this->pd->data.~Object(); - new ( &this->pd->data ) Object( args... ); - Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object)); - this->set_last_sync_state(); - return true; - } - - // free will just decrement the counter - this->free(); - - return this->allocate( args... ); - } - - const Object* operator->() const - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return &this->pd->data; - } - - Object* operator->() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - this->pd->maybe_modified = true; - return &this->pd->data; - } - - const Object& operator *() const - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return this->pd->data; - } - - Object& operator *() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - this->pd->maybe_modified = true; - return this->pd->data; - } - - operator bool() - { - return this->pd; - } - - template< typename Device = Devices::Host > - __cuda_callable__ - const Object& getData() const - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - return this->pd->data; - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - - } - - template< typename Device = Devices::Host > - __cuda_callable__ - Object& modifyData() - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - { - this->pd->maybe_modified = true; - return this->pd->data; - } - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - - } - - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( const SharedPointer& ptr ) - { - this->free(); - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - if( this->pd != nullptr ) - this->pd->counter += 1; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - return *this; - } - - // conditional operator for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( const SharedPointer< Object_, DeviceType >& ptr ) - { - this->free(); - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - if( this->pd != nullptr ) - this->pd->counter += 1; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Copy-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - return *this; - } - - // this is needed only to avoid the default compiler-generated operator - const SharedPointer& operator=( SharedPointer&& ptr ) - { - this->free(); - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - ptr.pd = nullptr; - ptr.mic_pointer = nullptr; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - return *this; - } - - // conditional operator for non-const -> const data - template< typename Object_, - typename = typename Enabler< Object_ >::type > - const SharedPointer& operator=( SharedPointer< Object_, DeviceType >&& ptr ) - { - this->free(); - this->pd = (PointerData*) ptr.pd; - this->mic_pointer = ptr.mic_pointer; - ptr.pd = nullptr; - ptr.mic_pointer = nullptr; -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Move-assigned shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - return *this; - } - - bool synchronize() - { - if( ! this->pd ) - return true; - - if( this->modified() ) - { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Synchronizing shared pointer: counter = " << this->pd->counter << ", type: " << demangle(typeid(Object).name()) << std::endl; - std::cerr << " ( " << sizeof( Object ) << " bytes, MIC adress " << this->mic_pointer << " )" << std::endl; -#endif - TNL_ASSERT( this->mic_pointer, ); - - Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object)); - this->set_last_sync_state(); - return true; - } - return false; //?? - } - - void clear() - { - this->free(); - } - - void swap( SharedPointer& ptr2 ) - { - std::swap( this->pd, ptr2.pd ); - std::swap( this->mic_pointer, ptr2.mic_pointer ); - } - - ~SharedPointer() - { - this->free(); - Devices::MIC::removeSmartPointer( this ); - } - - protected: - - struct PointerData - { - Object data; - uint8_t data_image[ sizeof(Object) ]; - int counter; - bool maybe_modified; - - template< typename... Args > - explicit PointerData( Args... args ) - : data( args... ), - counter( 1 ), - maybe_modified( false ) - {} - }; - - template< typename... Args > - bool allocate( Args... args ) - { - this->pd = new PointerData( args... ); - if( ! this->pd ) - return false; - - mic_pointer = Allocators::MIC< Object >().allocate(1); - Devices::MIC::CopyToMIC((void*)this->mic_pointer,(void*) &this->pd->data,sizeof(Object)); - - if( ! this->mic_pointer ) - return false; - // set last-sync state - this->set_last_sync_state(); -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Created shared pointer to " << demangle(typeid(ObjectType).name()) << " (mic_pointer = " << this->mic_pointer << ")" << std::endl; -#endif - Devices::MIC::insertSmartPointer( this ); - return true; - } - - void set_last_sync_state() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ); - this->pd->maybe_modified = false; - } - - bool modified() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - // optimization: skip bitwise comparison if we're sure that the data is the same - if( ! this->pd->maybe_modified ) - return false; - return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( Object ) ) != 0; - } - - void free() - { - if( this->pd ) - { -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "Freeing shared pointer: counter = " << this->pd->counter << ", mic_pointer = " << this->mic_pointer << ", type: " << demangle(typeid(ObjectType).name()) << std::endl; -#endif - if( ! --this->pd->counter ) - { - delete this->pd; - this->pd = nullptr; - if( this->mic_pointer ) - { - Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1); - mic_pointer=nullptr; - } -#ifdef TNL_DEBUG_SHARED_POINTERS - std::cerr << "...deleted data." << std::endl; -#endif - } - } - } - - PointerData* pd; - - // cuda_pointer can't be part of PointerData structure, since we would be - // unable to dereference this-pd on the device -- NevÃÂm zda to platàpro MIC, asi jo - Object* mic_pointer; -}; -#endif - -} // namespace Pointers -} // namespace TNL diff --git a/src/TNL/Pointers/SmartPointersRegister.h b/src/TNL/Pointers/SmartPointersRegister.h index ad716b9c036ee011dca583b2557247ce0d110453..7f261a28e203a1f0ebfb47729742405e26b8f20d 100644 --- a/src/TNL/Pointers/SmartPointersRegister.h +++ b/src/TNL/Pointers/SmartPointersRegister.h @@ -2,7 +2,7 @@ SmartPointersRegister.h - description ------------------- begin : Apr 29, 2016 - copyright : (C) 2016 by Tomas Oberhuber + copyright : (C) 2016 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ @@ -12,24 +12,45 @@ #include <unordered_set> #include <unordered_map> + #include <TNL/Pointers/SmartPointer.h> -#include <TNL/Assert.h> +#include <TNL/Timer.h> +#include <TNL/Cuda/DeviceInfo.h> +#include <TNL/Devices/Sequential.h> +#include <TNL/Devices/Host.h> namespace TNL { namespace Pointers { +// Since TNL currently supports only execution on host (which does not need +// to register and synchronize smart pointers) and CUDA GPU's, the smart +// pointers register is implemented only for CUDA. If more execution types +// which need to register smart pointers are implemented in the future, this +// should beome a class template specialization. class SmartPointersRegister { public: - void insert( SmartPointer* pointer, int deviceId ) + /** + * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be + * called to get the device ID. + */ + void insert( SmartPointer* pointer, int deviceId = -1 ) { + if( deviceId < 0 ) + deviceId = Cuda::DeviceInfo::getActiveDevice(); pointersOnDevices[ deviceId ].insert( pointer ); } - void remove( SmartPointer* pointer, int deviceId ) + /** + * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be + * called to get the device ID. + */ + void remove( SmartPointer* pointer, int deviceId = -1 ) { + if( deviceId < 0 ) + deviceId = Cuda::DeviceInfo::getActiveDevice(); try { pointersOnDevices.at( deviceId ).erase( pointer ); } @@ -41,8 +62,14 @@ class SmartPointersRegister } } - bool synchronizeDevice( int deviceId ) + /** + * Negative deviceId means that \ref Cuda::DeviceInfo::getActiveDevice will be + * called to get the device ID. + */ + bool synchronizeDevice( int deviceId = -1 ) { + if( deviceId < 0 ) + deviceId = Cuda::DeviceInfo::getActiveDevice(); try { const auto & set = pointersOnDevices.at( deviceId ); for( auto&& it : set ) @@ -61,5 +88,38 @@ class SmartPointersRegister std::unordered_map< int, SetType > pointersOnDevices; }; + +// TODO: Device -> Allocator (in all smart pointers) +template< typename Device > +SmartPointersRegister& getSmartPointersRegister() +{ + static SmartPointersRegister reg; + return reg; +} + +template< typename Device > +Timer& getSmartPointersSynchronizationTimer() +{ + static Timer timer; + return timer; +} + +/** + * Negative deviceId means that the ID of the currently active device will be + * determined automatically. + */ +template< typename Device > +bool synchronizeSmartPointersOnDevice( int deviceId = -1 ) +{ + // TODO: better way to skip synchronization of host-only smart pointers + if( std::is_same< Device, Devices::Sequential >::value || std::is_same< Device, Devices::Host >::value ) + return true; + + getSmartPointersSynchronizationTimer< Device >().start(); + bool b = getSmartPointersRegister< Device >().synchronizeDevice( deviceId ); + getSmartPointersSynchronizationTimer< Device >().stop(); + return b; +} + } // namespace Pointers } // namespace TNL diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h index cfb7b543fc3e94858ad5c34d4bf8e8c0faf85462..071de4d51132fa6b71e0e6b86ab16acd3c8269c8 100644 --- a/src/TNL/Pointers/UniquePointer.h +++ b/src/TNL/Pointers/UniquePointer.h @@ -15,8 +15,9 @@ #include <TNL/Allocators/Default.h> #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> #include <TNL/Pointers/SmartPointer.h> +#include <TNL/Pointers/SmartPointersRegister.h> +#include <TNL/Cuda/MemoryHelpers.h> #include <cstring> // std::memcpy, std::memcmp #include <cstddef> // std::nullptr_t @@ -250,7 +251,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer ~UniquePointer() { this->free(); - Devices::Cuda::removeSmartPointer( this ); + getSmartPointersRegister< DeviceType >().remove( this ); } protected: @@ -273,10 +274,10 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer { this->pd = new PointerData( args... ); // pass to device - this->cuda_pointer = Devices::Cuda::passToDevice( this->pd->data ); + this->cuda_pointer = Cuda::passToDevice( this->pd->data ); // set last-sync state this->set_last_sync_state(); - Devices::Cuda::insertSmartPointer( this ); + getSmartPointersRegister< DeviceType >().insert( this ); return true; } @@ -301,7 +302,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer if( this->pd ) delete this->pd; if( this->cuda_pointer ) - Devices::Cuda::freeFromDevice( this->cuda_pointer ); + Cuda::freeFromDevice( this->cuda_pointer ); } PointerData* pd; @@ -311,187 +312,9 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer Object* cuda_pointer; }; -#ifdef HAVE_MIC -template< typename Object > -class UniquePointer< Object, Devices::MIC > : public SmartPointer -{ - public: - - typedef Object ObjectType; - typedef Devices::MIC DeviceType; - - UniquePointer( std::nullptr_t ) - : pd( nullptr ), - mic_pointer( nullptr ) - {} - - template< typename... Args > - explicit UniquePointer( const Args... args ) - : pd( nullptr ), - mic_pointer( nullptr ) - { - this->allocate( args... ); - } - - const Object* operator->() const - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return &this->pd->data; - } - - Object* operator->() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - this->pd->maybe_modified = true; - return &this->pd->data; - } - - const Object& operator *() const - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - return this->pd->data; - } - - Object& operator *() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - this->pd->maybe_modified = true; - return this->pd->data; - } - - operator bool() - { - return this->pd; - } - - template< typename Device = Devices::Host > - const Object& getData() const - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - return this->pd->data; - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - } - - template< typename Device = Devices::Host > - Object& modifyData() - { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::MIC >::value, "Only Devices::Host or Devices::MIC devices are accepted here." ); - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - TNL_ASSERT_TRUE( this->mic_pointer, "Attempt to dereference a null pointer" ); - if( std::is_same< Device, Devices::Host >::value ) - { - this->pd->maybe_modified = true; - return this->pd->data; - } - if( std::is_same< Device, Devices::MIC >::value ) - return *( this->mic_pointer ); - } - - const UniquePointer& operator=( UniquePointer& ptr ) - { - this->free(); - this->pd = ptr.pd; - this->mic_pointer = ptr.mic_pointer; - ptr.pd = nullptr; - ptr.mic_pointer = nullptr; - return *this; - } - - const UniquePointer& operator=( UniquePointer&& ptr ) - { - return this->operator=( ptr ); - } - - bool synchronize() - { - if( ! this->pd ) - return true; - if( this->modified() ) - { - Devices::MIC::CopyToMIC(this->mic_pointer,(void*) &this->pd->data,sizeof(Object)); - this->set_last_sync_state(); - return true; - } - return true;//?? - } - - ~UniquePointer() - { - this->free(); - Devices::MIC::removeSmartPointer( this ); - } - - protected: - - struct PointerData - { - Object data; - char data_image[ sizeof(Object) ]; - bool maybe_modified; - - template< typename... Args > - explicit PointerData( Args... args ) - : data( args... ), - maybe_modified( false ) - {} - }; - - template< typename... Args > - bool allocate( Args... args ) - { - this->pd = new PointerData( args... ); - if( ! this->pd ) - return false; - // pass to device - this->mic_pointer = Allocators::MIC< Object >().allocate(1); - if( ! this->mic_pointer ) - return false; - Devices::MIC::CopyToMIC((void*)mic_pointer,(void*)&this->pd->data,sizeof(Object)); - // set last-sync state - this->set_last_sync_state(); - Devices::MIC::insertSmartPointer( this ); - return true; - } - - void set_last_sync_state() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - std::memcpy( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) ); - this->pd->maybe_modified = false; - } - - bool modified() - { - TNL_ASSERT_TRUE( this->pd, "Attempt to dereference a null pointer" ); - // optimization: skip bitwise comparison if we're sure that the data is the same - if( ! this->pd->maybe_modified ) - return false; - return std::memcmp( (void*) &this->pd->data_image, (void*) &this->pd->data, sizeof( ObjectType ) ) != 0; - } - - void free() - { - if( this->pd ) - delete this->pd; - if( this->mic_pointer ) - Allocators:::MIC< ObjectType >().deallocate(mic_pointer, 1); - } - - PointerData* pd; - - // mic_pointer can't be part of PointerData structure, since we would be - // unable to dereference this-pd on the device - Object* mic_pointer; -}; -#endif - } // namespace Pointers -#if (!defined(NDEBUG)) && (!defined(HAVE_MIC)) +#ifndef NDEBUG namespace Assert { template< typename Object, typename Device > @@ -501,7 +324,7 @@ struct Formatter< Pointers::UniquePointer< Object, Device > > printToString( const Pointers::UniquePointer< Object, Device >& value ) { ::std::stringstream ss; - ss << "(UniquePointer< " << Object::getType() << ", " << Device::getDeviceType() + ss << "(" + getType< Pointers::UniquePointer< Object, Device > >() << " > object at " << &value << ")"; return ss.str(); } diff --git a/src/TNL/Problems/HeatEquationEocProblem.h b/src/TNL/Problems/HeatEquationEocProblem.h index 51990252ca731252a5c363870f7076fbe224363f..78dd640b353e302f3a97f8dd381db297e8b8483a 100644 --- a/src/TNL/Problems/HeatEquationEocProblem.h +++ b/src/TNL/Problems/HeatEquationEocProblem.h @@ -36,8 +36,6 @@ class HeatEquationEocProblem : public HeatEquationProblem< Mesh, BoundaryConditi using typename BaseType::MeshPointer; - static String getType(); - bool setup( const Config::ParameterContainer& parameters, const String& prefix ); }; diff --git a/src/TNL/Problems/HeatEquationEocProblem_impl.h b/src/TNL/Problems/HeatEquationEocProblem_impl.h index ae062df74ec825f124961b5ff33f2223c32b7d54..f7c7aea5cd2a6604683346cb7a65db3b8be6ffab 100644 --- a/src/TNL/Problems/HeatEquationEocProblem_impl.h +++ b/src/TNL/Problems/HeatEquationEocProblem_impl.h @@ -20,19 +20,7 @@ #include "HeatEquationProblem.h" namespace TNL { -namespace Problems { - -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename Communicator, - typename DifferentialOperator > -String -HeatEquationEocProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >:: -getType() -{ - return String( "heatEquationEocSolver< " ) + Mesh :: getType() + " >"; -} +namespace Problems { template< typename Mesh, typename BoundaryCondition, diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h index cddd70746a295378450ab2b1cee16976587b0f83..26df28965ec42e855fd034de7dea748999381e67 100644 --- a/src/TNL/Problems/HeatEquationProblem.h +++ b/src/TNL/Problems/HeatEquationProblem.h @@ -62,8 +62,6 @@ class HeatEquationProblem : public PDEProblem< Mesh, typedef Communicator CommunicatorType; - static String getType(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h index 64b4a2ca91f34b385961808dc89a6909686da9c2..bc339e9b3ba56eb9e4d3499d4954be57cda7d864 100644 --- a/src/TNL/Problems/HeatEquationProblem_impl.h +++ b/src/TNL/Problems/HeatEquationProblem_impl.h @@ -27,18 +27,6 @@ namespace TNL { namespace Problems { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename Communicator, - typename DifferentialOperator > -String -HeatEquationProblem< Mesh, BoundaryCondition, RightHandSide, Communicator, DifferentialOperator >:: -getType() -{ - return String( "HeatEquationProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h index e50afe7faf31327da9b84cda4661812280f469b6..7839dd8dda2e1e7115007126654e7249ceff23e3 100644 --- a/src/TNL/Problems/MeanCurvatureFlowEocProblem.h +++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem.h @@ -33,8 +33,6 @@ class MeanCurvatureFlowEocProblem : public MeanCurvatureFlowProblem< Mesh, Bound { public: - static String getType(); - bool setup( const Config::ParameterContainer& parameters ); }; diff --git a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h index 593028b3732dce7b4ead51af4e29feb8f85f5d56..71809f3cd4c7aeec164900aa5c217802a7d42435 100644 --- a/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h +++ b/src/TNL/Problems/MeanCurvatureFlowEocProblem_impl.h @@ -19,17 +19,6 @@ namespace TNL { namespace Problems { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator > -String -MeanCurvatureFlowEocProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: -getType() -{ - return String( "HeatEquationEocProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, @@ -47,4 +36,4 @@ setup( const Config::ParameterContainer& parameters ) } } // namespace Problems -} // namespace TNL \ No newline at end of file +} // namespace TNL diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem.h b/src/TNL/Problems/MeanCurvatureFlowProblem.h index 45e87025bd246d6c124215f0ef365030445f8a2c..415216dcea3e31474ac84ff75be57a04acc6a4e7 100644 --- a/src/TNL/Problems/MeanCurvatureFlowProblem.h +++ b/src/TNL/Problems/MeanCurvatureFlowProblem.h @@ -54,8 +54,6 @@ class MeanCurvatureFlowProblem : public PDEProblem< Mesh, using typename BaseType::MeshDependentDataType; using typename BaseType::MeshDependentDataPointer; - static String getType(); - String getPrologHeader() const; void writeProlog( Logger& logger, diff --git a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h index 2cd7f9aaf123a32928b1d545b5390743972c4ec3..48807addfbe91837f55f47a4cb9fe60dafe3b023 100644 --- a/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h +++ b/src/TNL/Problems/MeanCurvatureFlowProblem_impl.h @@ -30,17 +30,6 @@ namespace TNL { namespace Problems { -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator > -String -MeanCurvatureFlowProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator >:: -getType() -{ - return String( "tnlMeanCurvativeFlowProblem< " ) + Mesh :: getType() + " >"; -} - template< typename Mesh, typename BoundaryCondition, typename RightHandSide, diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h index 51e56d44aa47e35689a46543e37d3c23cc9f2a7b..69d95aaeee7e5db273940602b7f192c0b75b2591 100644 --- a/src/TNL/Problems/PDEProblem.h +++ b/src/TNL/Problems/PDEProblem.h @@ -50,8 +50,6 @@ class PDEProblem : public Problem< Real, Device, Index > * This means that the time stepper will be set from the command line arguments. */ typedef void TimeStepper; - - static String getType(); String getPrologHeader() const; diff --git a/src/TNL/Problems/PDEProblem_impl.h b/src/TNL/Problems/PDEProblem_impl.h index 151f1e2ac553e212d2f7adf64c99c73fbad1bf3a..6a3aa63e6d82bce68b9f549b413d275504f137aa 100644 --- a/src/TNL/Problems/PDEProblem_impl.h +++ b/src/TNL/Problems/PDEProblem_impl.h @@ -16,22 +16,6 @@ namespace TNL { namespace Problems { -template< typename Mesh, - typename Communicator, - typename Real, - typename Device, - typename Index > -String -PDEProblem< Mesh, Communicator, Real, Device, Index >:: -getType() -{ - return String( "PDEProblem< " ) + - Mesh::getType() + ", " + - TNL::getType< Real >() + ", " + - Device::getDeviceType() + ", " + - TNL::getType< Index >() + " >"; -} - template< typename Mesh, typename Communicator, typename Real, diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h index e725eb67ffd75b148cf3368029f51f14703b4f50..1ea084f4f172ce3aa33bdcc7adb16888a3d65fdb 100644 --- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h +++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver.h @@ -34,8 +34,6 @@ class NavierStokesSolver NavierStokesSolver(); - static String getType(); - void setAdvectionScheme( AdvectionSchemeType& advection ); void setDiffusionScheme( DiffusionSchemeType& u1Viscosity, @@ -148,4 +146,4 @@ class NavierStokesSolver } // namespace TNL -#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h> \ No newline at end of file +#include <TNL/Solvers/cfd/navier-stokes/NavierStokesSolver_impl.h> diff --git a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h index a12ec5cb100f1fe16e9f089ff42bc4da671e6bd0..a266938862c6f253a2318e8a0bab7b27d2ec2059 100644 --- a/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h +++ b/src/TNL/Problems/cfd/navier-stokes/NavierStokesSolver_impl.h @@ -29,16 +29,6 @@ NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::Navi { } -template< typename AdvectionScheme, - typename DiffusionScheme, - typename BoundaryConditions > -String NavierStokesSolver< AdvectionScheme, DiffusionScheme, BoundaryConditions >::getType() -{ - return String( "NavierStokesSolver< " ) + - AdvectionScheme::getType() + ", " + - DiffusionScheme::getType() + " >"; -} - template< typename AdvectionScheme, typename DiffusionScheme, typename BoundaryConditions > diff --git a/src/TNL/Solvers/BuildConfigTags.h b/src/TNL/Solvers/BuildConfigTags.h index 19bb42129563b48a7e1e1ba9baf8f107d25ee661..bcd4cdafcacff729b51b827348bcd7703f4bec21 100644 --- a/src/TNL/Solvers/BuildConfigTags.h +++ b/src/TNL/Solvers/BuildConfigTags.h @@ -27,10 +27,6 @@ template< typename ConfigTag, typename Device > struct ConfigTagDevice{ enum { e template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::Cuda >{ enum { enabled = false }; }; #endif -#ifndef HAVE_MIC -template< typename ConfigTag > struct ConfigTagDevice< ConfigTag, Devices::MIC >{ enum { enabled = false }; }; -#endif - /**** * All real types are enabled by default. */ diff --git a/src/TNL/Solvers/Linear/BICGStab.h b/src/TNL/Solvers/Linear/BICGStab.h index 686d6f4503d2cfc7d73fd74482c670f56db9a793..2cede824ad00c4ea8b4cb2f270d86882f5bfcfe3 100644 --- a/src/TNL/Solvers/Linear/BICGStab.h +++ b/src/TNL/Solvers/Linear/BICGStab.h @@ -28,8 +28,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const; - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Solvers/Linear/BICGStabL.h b/src/TNL/Solvers/Linear/BICGStabL.h index a35962d54b900ae0a50dfe1f42ff04d9235fc3a8..f2481b588bc92722e9795fea2a91b8676e5a307d 100644 --- a/src/TNL/Solvers/Linear/BICGStabL.h +++ b/src/TNL/Solvers/Linear/BICGStabL.h @@ -65,8 +65,6 @@ public: using ConstVectorViewType = typename Base::ConstVectorViewType; using VectorType = typename Traits::VectorType; - String getType() const; - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Solvers/Linear/BICGStabL_impl.h b/src/TNL/Solvers/Linear/BICGStabL_impl.h index 1f20d4a30e582d4565bcf15ccb4fdaa07c91947e..3f41e5115d0043b168bb9425a567bf02c97159b0 100644 --- a/src/TNL/Solvers/Linear/BICGStabL_impl.h +++ b/src/TNL/Solvers/Linear/BICGStabL_impl.h @@ -20,16 +20,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String -BICGStabL< Matrix >:: -getType() const -{ - return String( "BICGStabL< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > void BICGStabL< Matrix >:: diff --git a/src/TNL/Solvers/Linear/BICGStab_impl.h b/src/TNL/Solvers/Linear/BICGStab_impl.h index 735358622bd9875e825019793e0237f488f20bb4..baa4b6363e712ec4156e7a4bc79bc6e32bcc031c 100644 --- a/src/TNL/Solvers/Linear/BICGStab_impl.h +++ b/src/TNL/Solvers/Linear/BICGStab_impl.h @@ -18,14 +18,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String BICGStab< Matrix > :: getType() const -{ - return String( "BICGStab< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > void BICGStab< Matrix >:: diff --git a/src/TNL/Solvers/Linear/CG.h b/src/TNL/Solvers/Linear/CG.h index b87caf24784affb9e425fd7ca7748134995d10e0..375db25cb9db4e4d9c9b4e253ba72ecbb1923f0f 100644 --- a/src/TNL/Solvers/Linear/CG.h +++ b/src/TNL/Solvers/Linear/CG.h @@ -30,8 +30,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const; - bool solve( ConstVectorViewType b, VectorViewType x ) override; protected: diff --git a/src/TNL/Solvers/Linear/CG_impl.h b/src/TNL/Solvers/Linear/CG_impl.h index 07f8ea1979a691029a34fecfe1e4cf052d1a0f73..9c1b0458aab7fe0566767717826182c7f034bf02 100644 --- a/src/TNL/Solvers/Linear/CG_impl.h +++ b/src/TNL/Solvers/Linear/CG_impl.h @@ -16,14 +16,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String CG< Matrix > :: getType() const -{ - return String( "CG< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > bool CG< Matrix >:: diff --git a/src/TNL/Solvers/Linear/GMRES.h b/src/TNL/Solvers/Linear/GMRES.h index dd72e2832af81b65b9fdcc8d19090d669232bc62..f1a4b87328a630411c634bed6744494f76afea02 100644 --- a/src/TNL/Solvers/Linear/GMRES.h +++ b/src/TNL/Solvers/Linear/GMRES.h @@ -37,8 +37,6 @@ public: using ConstVectorViewType = typename Base::ConstVectorViewType; using VectorType = typename Traits::VectorType; - String getType() const; - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); @@ -51,9 +49,9 @@ protected: // local vectors/views using ConstDeviceView = typename Traits::ConstLocalViewType; using DeviceView = typename Traits::LocalViewType; - using HostView = typename DeviceView::HostType; using DeviceVector = typename Traits::LocalVectorType; - using HostVector = typename DeviceVector::HostType; + using HostView = typename DeviceView::template Self< RealType, Devices::Host >; + using HostVector = typename DeviceVector::template Self< RealType, Devices::Host >;; enum class Variant { MGS, MGSR, CWY }; diff --git a/src/TNL/Solvers/Linear/GMRES_impl.h b/src/TNL/Solvers/Linear/GMRES_impl.h index 5d7942f841100221bb3d16c807179c17468e604b..d6cb8fdd095120c35cbf303a734ed7c5667bb79d 100644 --- a/src/TNL/Solvers/Linear/GMRES_impl.h +++ b/src/TNL/Solvers/Linear/GMRES_impl.h @@ -15,7 +15,7 @@ #include <type_traits> #include <cmath> -#include <TNL/Containers/Algorithms/Multireduction.h> +#include <TNL/Algorithms/Multireduction.h> #include <TNL/Matrices/MatrixOperations.h> #include "GMRES.h" @@ -24,16 +24,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String -GMRES< Matrix >:: -getType() const -{ - return String( "GMRES< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > void GMRES< Matrix >:: @@ -390,7 +380,7 @@ hauseholder_generate( const int i, else y_i[ j ] = z[ j ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, size, kernel_truncation ); } else { ConstDeviceView z_local = Traits::getConstLocalView( z ); @@ -430,7 +420,7 @@ hauseholder_generate( const int i, const RealType* _y_i = Traits::getConstLocalView( y_i ).getData(); const IndexType ldSize = this->ldSize; auto fetch = [_Y, _y_i, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _y_i[ idx ]; }; - Containers::Algorithms::Multireduction< DeviceType >::reduce + Algorithms::Multireduction< DeviceType >::reduce ( (RealType) 0, fetch, std::plus<>{}, @@ -461,7 +451,7 @@ hauseholder_apply_trunc( HostView out, // The upper (m+1)x(m+1) submatrix of Y is duplicated in the YL buffer, // which resides on host and is broadcasted from rank 0 to all processes. HostView YL_i( &YL[ i * (restarting_max + 1) ], restarting_max + 1 ); - Containers::Algorithms::ArrayOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() ); + Algorithms::MultiDeviceMemoryOperations< Devices::Host, DeviceType >::copy( YL_i.getData(), Traits::getLocalView( y_i ).getData(), YL_i.getSize() ); // no-op if the problem is not distributed CommunicatorType::Bcast( YL_i.getData(), YL_i.getSize(), 0, Traits::getCommunicationGroup( *this->matrix ) ); @@ -476,7 +466,7 @@ hauseholder_apply_trunc( HostView out, } if( std::is_same< DeviceType, Devices::Cuda >::value ) { RealType host_z[ i + 1 ]; - Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 ); + Algorithms::MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy( host_z, Traits::getConstLocalView( z ).getData(), i + 1 ); for( int k = 0; k <= i; k++ ) out[ k ] = host_z[ k ] - YL_i[ k ] * aux; } @@ -530,7 +520,7 @@ hauseholder_cwy_transposed( VectorViewType z, const RealType* _w = Traits::getConstLocalView( w ).getData(); const IndexType ldSize = this->ldSize; auto fetch = [_Y, _w, ldSize] __cuda_callable__ ( IndexType idx, int k ) { return _Y[ idx + k * ldSize ] * _w[ idx ]; }; - Containers::Algorithms::Multireduction< DeviceType >::reduce + Algorithms::Multireduction< DeviceType >::reduce ( (RealType) 0, fetch, std::plus<>{}, diff --git a/src/TNL/Solvers/Linear/Jacobi.h b/src/TNL/Solvers/Linear/Jacobi.h index 5288726713525cbc5911d497d219ee266bd89243..e4e74d5dff6c7037278e0455303c6713ed26276f 100644 --- a/src/TNL/Solvers/Linear/Jacobi.h +++ b/src/TNL/Solvers/Linear/Jacobi.h @@ -29,11 +29,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const - { - return String( "Jacobi< " ) + this->matrix->getType() + ", " + this->preconditioner->getType() + " >"; - } - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ) { diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h index 25aa1cd7c7aff9d65c863a0f3a8e1e2f578ef1cb..f88e315ccf734a12ec20e53fb930016aa0330b36 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h +++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal.h @@ -38,11 +38,6 @@ public: virtual void solve( ConstVectorViewType b, VectorViewType x ) const override; - String getType() const - { - return String( "Diagonal" ); - } - protected: VectorType diagonal; }; @@ -67,11 +62,6 @@ public: virtual void solve( ConstVectorViewType b, VectorViewType x ) const override; - String getType() const - { - return String( "Diagonal" ); - } - protected: VectorType diagonal; }; diff --git a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h index de4b9f3f1ee84b668d4daf2a6515ddf102973d95..c9751fe4f89f60a6115947aede1279da8dfcdb5f 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h +++ b/src/TNL/Solvers/Linear/Preconditioners/Diagonal_impl.h @@ -14,7 +14,7 @@ #include "Diagonal.h" -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Solvers { @@ -39,7 +39,7 @@ update( const MatrixPointer& matrixPointer ) diag_view[ i ] = kernel_matrix->getElementFast( i, i ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); } template< typename Matrix > @@ -54,7 +54,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const x[ i ] = b[ i ] / diag_view[ i ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); } @@ -77,7 +77,7 @@ update( const MatrixPointer& matrixPointer ) diag_view[ i ] = kernel_matrix->getLocalMatrix().getElementFast( i, gi ); }; - ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); } template< typename Matrix, typename Communicator > @@ -94,7 +94,7 @@ solve( ConstVectorViewType b, VectorViewType x ) const x_view[ i ] = b_view[ i ] / diag_view[ i ]; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, diagonal.getSize(), kernel ); } } // namespace Preconditioners diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h index 1fc2fa3fa69f964cb3486d6ee16dcf43fc8d3b9f..8a177df055c682ead7b7037aae4772ae2d1ef1ab 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h @@ -37,13 +37,7 @@ class ILU0_impl template< typename Matrix > class ILU0 : public ILU0_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType > -{ -public: - String getType() const - { - return String( "ILU0" ); - } -}; +{}; template< typename Matrix, typename Real, typename Index > class ILU0_impl< Matrix, Real, Devices::Host, Index > @@ -199,29 +193,6 @@ public: } }; -template< typename Matrix, typename Real, typename Index > -class ILU0_impl< Matrix, Real, Devices::MIC, Index > -: public Preconditioner< Matrix > -{ -public: - using RealType = Real; - using DeviceType = Devices::MIC; - using IndexType = Index; - using typename Preconditioner< Matrix >::VectorViewType; - using typename Preconditioner< Matrix >::ConstVectorViewType; - using typename Preconditioner< Matrix >::MatrixPointer; - - virtual void update( const MatrixPointer& matrixPointer ) override - { - throw Exceptions::NotImplementedError("Not Iplemented yet for MIC"); - } - - virtual void solve( ConstVectorViewType b, VectorViewType x ) const override - { - throw Exceptions::NotImplementedError("Not Iplemented yet for MIC"); - } -}; - } // namespace Preconditioners } // namespace Linear } // namespace Solvers diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h index 626469920ff9e08d7f935e13017086b7cd583081..5ae255304f89eebb7a97fe2bfeac7ebc82b9c765 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h @@ -16,7 +16,7 @@ #include "TriangularSolve.h" #include <TNL/Exceptions/CudaSupportMissing.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> namespace TNL { namespace Solvers { @@ -282,7 +282,7 @@ allocate_LU() U->setDimensions( N, N ); // extract raw pointer - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); const CSR* kernel_A = &A.template getData< DeviceType >(); // copy row lengths @@ -308,7 +308,7 @@ allocate_LU() L_rowLengths_view[ i ] = L_entries; U_rowLengths_view[ i ] = U_entries; }; - ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths ); L->setCompressedRowLengths( L_rowLengths ); U->setCompressedRowLengths( U_rowLengths ); #else @@ -329,7 +329,7 @@ copy_triangular_factors() const int N = A->getRows(); // extract raw pointers - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); CSR* kernel_L = &L.template modifyData< DeviceType >(); CSR* kernel_U = &U.template modifyData< DeviceType >(); const CSR* kernel_A = &A.template getData< DeviceType >(); @@ -349,7 +349,7 @@ copy_triangular_factors() break; } }; - ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values ); + Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_values ); #else throw std::runtime_error("The program was not compiled with the CUSPARSE library. Pass -DHAVE_CUSPARSE -lcusparse to the compiler."); #endif diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h index 8f4c27d7abd8d65566916b7ac79f34d269bc84e1..cce3dc5c4bde030dc33c4762623124e1d3f65367 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h @@ -33,11 +33,6 @@ class ILUT : public ILUT_impl< Matrix, typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType > { public: - String getType() const - { - return String( "ILUT" ); - } - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ) { @@ -111,29 +106,6 @@ public: } }; -template< typename Matrix, typename Real, typename Index > -class ILUT_impl< Matrix, Real, Devices::MIC, Index > -: public Preconditioner< Matrix > -{ -public: - using RealType = Real; - using DeviceType = Devices::MIC; - using IndexType = Index; - using typename Preconditioner< Matrix >::VectorViewType; - using typename Preconditioner< Matrix >::ConstVectorViewType; - using typename Preconditioner< Matrix >::MatrixPointer; - - virtual void update( const MatrixPointer& matrixPointer ) override - { - throw std::runtime_error("Not Iplemented yet for MIC"); - } - - virtual void solve( ConstVectorViewType b, VectorViewType x ) const override - { - throw std::runtime_error("Not Iplemented yet for MIC"); - } -}; - } // namespace Preconditioners } // namespace Linear } // namespace Solvers diff --git a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h index 2e70be2b1b044ee45585ef689d443db1d1e8a8c1..67a62e74f1f2fc53203be81897163eba84cfc7ea 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h +++ b/src/TNL/Solvers/Linear/Preconditioners/Preconditioner.h @@ -55,11 +55,6 @@ public: throw std::logic_error("The solve() method of a dummy preconditioner should not be called."); } - String getType() const - { - return String( "Preconditioner" ); - } - virtual ~Preconditioner() {} }; diff --git a/src/TNL/Solvers/Linear/SOR.h b/src/TNL/Solvers/Linear/SOR.h index 7e94634cdf178abf9df838e533529c44ea5fb2cc..0d9aae433324aa3fd1346237c851fc5f6a192225 100644 --- a/src/TNL/Solvers/Linear/SOR.h +++ b/src/TNL/Solvers/Linear/SOR.h @@ -28,8 +28,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const; - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Solvers/Linear/SOR_impl.h b/src/TNL/Solvers/Linear/SOR_impl.h index 648ae8d419643eb170788c701da161040fa7220d..4a7d4fb9d3784e7079505405ad750def3e647630 100644 --- a/src/TNL/Solvers/Linear/SOR_impl.h +++ b/src/TNL/Solvers/Linear/SOR_impl.h @@ -17,14 +17,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String SOR< Matrix > :: getType() const -{ - return String( "SOR< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > void SOR< Matrix >:: diff --git a/src/TNL/Solvers/Linear/TFQMR.h b/src/TNL/Solvers/Linear/TFQMR.h index 73d0894aada0cc311146d6fff686fefd2e934e3e..2a94f44e7c94413edbe54203d87227ded8cc6983 100644 --- a/src/TNL/Solvers/Linear/TFQMR.h +++ b/src/TNL/Solvers/Linear/TFQMR.h @@ -28,8 +28,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const; - bool solve( ConstVectorViewType b, VectorViewType x ) override; protected: diff --git a/src/TNL/Solvers/Linear/TFQMR_impl.h b/src/TNL/Solvers/Linear/TFQMR_impl.h index 590aa35af0b9c9cf8cf73a0c66e02a265a1bacdf..0ea03e83c5fc06b41ef0bdb1e11e9fa3a6a4d164 100644 --- a/src/TNL/Solvers/Linear/TFQMR_impl.h +++ b/src/TNL/Solvers/Linear/TFQMR_impl.h @@ -18,14 +18,6 @@ namespace TNL { namespace Solvers { namespace Linear { -template< typename Matrix > -String TFQMR< Matrix > :: getType() const -{ - return String( "TFQMR< " ) + - this->matrix -> getType() + ", " + - this->preconditioner -> getType() + " >"; -} - template< typename Matrix > bool TFQMR< Matrix >::solve( ConstVectorViewType b, VectorViewType x ) { diff --git a/src/TNL/Solvers/Linear/UmfpackWrapper.h b/src/TNL/Solvers/Linear/UmfpackWrapper.h index 1d4e67ea2771e6d389eb4e84ca199423d927fd16..0e2e5d7ac78a9d6264f1b4448beaf5728e7dce7f 100644 --- a/src/TNL/Solvers/Linear/UmfpackWrapper.h +++ b/src/TNL/Solvers/Linear/UmfpackWrapper.h @@ -81,8 +81,6 @@ public: using VectorViewType = typename Base::VectorViewType; using ConstVectorViewType = typename Base::ConstVectorViewType; - String getType() const; - bool solve( ConstVectorViewType b, VectorViewType x ) override; }; diff --git a/src/TNL/Solvers/ODE/Euler.h b/src/TNL/Solvers/ODE/Euler.h index 2ba128073ec65aba4f8e1bc5c7f6cad661f67303..1fd6ab3c11a8a786f9a713e11082d69ae9912f36 100644 --- a/src/TNL/Solvers/ODE/Euler.h +++ b/src/TNL/Solvers/ODE/Euler.h @@ -10,12 +10,10 @@ #pragma once -#include <math.h> #include <TNL/Config/ConfigDescription.h> #include <TNL/Solvers/ODE/ExplicitSolver.h> #include <TNL/Solvers/DummyProblem.h> #include <TNL/Config/ParameterContainer.h> -#include <TNL/Timer.h> namespace TNL { namespace Solvers { @@ -37,8 +35,6 @@ class Euler : public ExplicitSolver< Problem, SolverMonitor > Euler(); - static String getType(); - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Solvers/ODE/Euler.hpp b/src/TNL/Solvers/ODE/Euler.hpp index 12da6439bd15d4fdbe1e0a088910c940cfc90aa2..9dc6b6570329e2684e359c109dc6eb969ee0b9e7 100644 --- a/src/TNL/Solvers/ODE/Euler.hpp +++ b/src/TNL/Solvers/ODE/Euler.hpp @@ -10,9 +10,7 @@ #pragma once -#include <TNL/Devices/MIC.h> -#include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/NoDistrCommunicator.h> +#include <TNL/Solvers/ODE/Euler.h> namespace TNL { namespace Solvers { @@ -33,14 +31,6 @@ Euler< Problem, SolverMonitor > :: Euler() { }; -template< typename Problem, typename SolverMonitor > -String Euler< Problem, SolverMonitor > :: getType() -{ - return String( "Euler< " ) + - Problem :: getType() + - String( " >" ); -}; - template< typename Problem, typename SolverMonitor > void Euler< Problem, SolverMonitor > :: configSetup( Config::ConfigDescription& config, const String& prefix ) @@ -77,7 +67,6 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u ) /**** * First setup the supporting meshes k1...k5 and k_tmp. */ - //timer.start(); _k1->setLike( *_u ); auto k1 = _k1->getView(); auto u = _u->getView(); @@ -104,9 +93,7 @@ bool Euler< Problem, SolverMonitor > :: solve( DofVectorPointer& _u ) /**** * Compute the RHS */ - //timer.stop(); this->problem->getExplicitUpdate( time, currentTau, _u, _k1 ); - //timer.start(); RealType lastResidue = this->getResidue(); RealType maxResidue( 0.0 ); diff --git a/src/TNL/Solvers/ODE/Merson.h b/src/TNL/Solvers/ODE/Merson.h index 3ac978178cfe7f42050199f5cf9e8d722c504015..99ffc2409ed67540be41a48c9ab030fd3756d705 100644 --- a/src/TNL/Solvers/ODE/Merson.h +++ b/src/TNL/Solvers/ODE/Merson.h @@ -35,8 +35,6 @@ class Merson : public ExplicitSolver< Problem, SolverMonitor > Merson(); - static String getType(); - static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h index 3c88576e9e583c4c161782f630630573bd23be3a..4c7b21bc93c5bcfb5adff76e89d876778f1049aa 100644 --- a/src/TNL/Solvers/ODE/Merson_impl.h +++ b/src/TNL/Solvers/ODE/Merson_impl.h @@ -40,14 +40,6 @@ Merson< Problem, SolverMonitor >::Merson() } }; -template< typename Problem, typename SolverMonitor > -String Merson< Problem, SolverMonitor >::getType() -{ - return String( "Merson< " ) + - Problem::getType() + - String( " >" ); -}; - template< typename Problem, typename SolverMonitor > void Merson< Problem, SolverMonitor >::configSetup( Config::ConfigDescription& config, const String& prefix ) diff --git a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h index a28a64cf5567eaee40f5d4efca3fd24af3dd2819..d1b871c25c0a998b68a770d4c0629ad76d20dfb5 100644 --- a/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h +++ b/src/TNL/Solvers/PDE/BoundaryConditionsSetter.h @@ -11,7 +11,7 @@ #pragma once -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CudaCallable.h> #include <TNL/Functions/FunctionAdapter.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Meshes/Traverser.h> diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h index 8a5f0db1e8f0effe43951a610e67bcd8d47d6548..d4f6992b37e27cccecf38a193be8435ea2d9fe96 100644 --- a/src/TNL/Solvers/PDE/ExplicitTimeStepper.h +++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper.h @@ -42,8 +42,6 @@ class ExplicitTimeStepper static_assert( ProblemType::isTimeDependent(), "The problem is not time dependent." ); - static String getType(); - ExplicitTimeStepper(); static void configSetup( Config::ConfigDescription& config, diff --git a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h index 4024ff32671bef44ba72405e578c5c17067ca7e9..fa2d1f8066ed22ebc2626e93ff88deb1e6790177 100644 --- a/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h +++ b/src/TNL/Solvers/PDE/ExplicitTimeStepper_impl.h @@ -14,21 +14,8 @@ namespace TNL { namespace Solvers { -namespace PDE { +namespace PDE { -template< typename Problem, - template < typename OdeProblem, typename SolverMonitor > class OdeSolver > -String -ExplicitTimeStepper< Problem, OdeSolver >:: -getType() -{ - return String( "ExplicitTimeStepper< " ) + - Problem::getType() + ", " + - OdeSolverType::getType() + ", " + - String( " >" ); -}; - - template< typename Problem, template < typename OdeProblem, typename SolverMonitor > class OdeSolver > ExplicitTimeStepper< Problem, OdeSolver >:: @@ -37,7 +24,7 @@ ExplicitTimeStepper() timeStep( 0 ), allIterations( 0 ) { -}; +} template< typename Problem, template < typename OdeProblem, typename SolverMonitor > class OdeSolver > diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h index 701c5eb730b99e2487e56bd5e56a9ffdec0b916d..e5673d5c1ed45ea9a28f8615cd4f099284bb8875 100644 --- a/src/TNL/Solvers/SolverConfig_impl.h +++ b/src/TNL/Solvers/SolverConfig_impl.h @@ -67,12 +67,6 @@ bool SolverConfig< ConfigTag, ProblemConfig >::configSetup( Config::ConfigDescri if( ConfigTagDevice< ConfigTag, Devices::Cuda >::enabled ) config.addEntryEnum( "cuda" ); #endif - -#ifdef HAVE_MIC - if( ConfigTagDevice< ConfigTag, Devices::MIC >::enabled ) - config.addEntryEnum( "mic" ); -#endif - /**** * Setup index type. diff --git a/src/TNL/Solvers/SolverInitiator_impl.h b/src/TNL/Solvers/SolverInitiator_impl.h index c6bc5ca7f494abd8922f1a0fcb45b4814277094f..e54a8fe308c4478a7242a24f8032473be8431d1e 100644 --- a/src/TNL/Solvers/SolverInitiator_impl.h +++ b/src/TNL/Solvers/SolverInitiator_impl.h @@ -12,7 +12,6 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> -#include <TNL/Devices/MIC.h> #include <TNL/Config/ParameterContainer.h> #include <TNL/Meshes/TypeResolver/TypeResolver.h> #include <TNL/Solvers/BuildConfigTags.h> @@ -92,8 +91,6 @@ class SolverInitiatorRealResolver< ProblemSetter, Real, ConfigTag, true > return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Host, ConfigTag >::run( parameters ); if( device == "cuda" ) return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::Cuda, ConfigTag >::run( parameters ); - if(device == "mic") - return SolverInitiatorDeviceResolver< ProblemSetter, Real, Devices::MIC, ConfigTag >::run( parameters ); std::cerr << "The device '" << device << "' is not defined. " << std::endl; return false; } diff --git a/src/TNL/Solvers/SolverStarter_impl.h b/src/TNL/Solvers/SolverStarter_impl.h index e52d03a4f7377c5fc80ade50d7981a3a07a48f08..8b323d5d745e08216bc3477adc03d3495bce8dce 100644 --- a/src/TNL/Solvers/SolverStarter_impl.h +++ b/src/TNL/Solvers/SolverStarter_impl.h @@ -406,7 +406,7 @@ bool SolverStarter< ConfigTag > :: writeEpilog( std::ostream& str, const Solver& if( std::is_same< typename Solver::DeviceType, TNL::Devices::Cuda >::value ) { logger.writeParameter< const char* >( "GPU synchronization time:", "" ); - TNL::Devices::Cuda::getSmartPointersSynchronizationTimer().writeLog( logger, 1 ); + Pointers::getSmartPointersSynchronizationTimer< Devices::Cuda >().writeLog( logger, 1 ); } logger.writeParameter< const char* >( "I/O time:", "" ); this->ioTimer.writeLog( logger, 1 ); diff --git a/src/TNL/String.h b/src/TNL/String.h index a04802216c2dd14a6495b30146a49066f708820e..f35abc377177b6b061b68074714ce3e143b55d22 100644 --- a/src/TNL/String.h +++ b/src/TNL/String.h @@ -21,8 +21,6 @@ namespace TNL { -class String; - /** * \brief Class for managing strings. * @@ -39,8 +37,6 @@ class String; * * \ref operator+ * - * \ref operator<< - * * \ref mpiSend * * \ref mpiReceive @@ -101,11 +97,6 @@ class String */ using std::string::operator=; - /** - * \brief Returns type of string: \c "String". - */ - static String getType(); - /** * \brief Returns the number of characters in given string. Equivalent to \ref getSize. */ @@ -368,11 +359,6 @@ String operator+( const char* string1, const String& string2 ); */ String operator+( const std::string& string1, const String& string2 ); -/** - * \brief Writes the string \e str to given \e stream - */ -std::ostream& operator<<( std::ostream& stream, const String& str ); - /** * \brief Converts \e value of type \e T to a String. * diff --git a/src/TNL/String.hpp b/src/TNL/String.hpp index 4cdeee7aceb62e93b27a91655938df494c9e3666..3c38fe6b0ca24a58242461788b2b417a94ab6a1e 100644 --- a/src/TNL/String.hpp +++ b/src/TNL/String.hpp @@ -19,11 +19,6 @@ namespace TNL { -inline String String::getType() -{ - return String( "String" ); -} - inline int String::getLength() const { return getSize(); @@ -243,12 +238,6 @@ inline String operator+( const std::string& string1, const String& string2 ) return String( string1 ) + string2; } -inline std::ostream& operator<<( std::ostream& stream, const String& str ) -{ - stream << str.getString(); - return stream; -} - #ifdef HAVE_MPI inline void mpiSend( const String& str, int target, int tag, MPI_Comm mpi_comm ) { diff --git a/src/TNL/Devices/SystemInfo.h b/src/TNL/SystemInfo.h similarity index 95% rename from src/TNL/Devices/SystemInfo.h rename to src/TNL/SystemInfo.h index f62321d6f819303ec4c12d174a71305f26792ac1..e64418a7cc9a056a1748006aa8388ff8ebd28421 100644 --- a/src/TNL/Devices/SystemInfo.h +++ b/src/TNL/SystemInfo.h @@ -15,7 +15,6 @@ #include <TNL/String.h> namespace TNL { -namespace Devices { struct CacheSizes { int L1instruction = 0; @@ -68,7 +67,6 @@ protected: } }; -} // namespace Devices } // namespace TNL -#include <TNL/Devices/SystemInfo_impl.h> +#include <TNL/SystemInfo.hpp> diff --git a/src/TNL/Devices/SystemInfo_impl.h b/src/TNL/SystemInfo.hpp similarity index 98% rename from src/TNL/Devices/SystemInfo_impl.h rename to src/TNL/SystemInfo.hpp index 0bc42601191aad2fd8a09fa8080a0295ebe6c075..b46234418df9df58daf9ceb836ddd606d8e170a2 100644 --- a/src/TNL/Devices/SystemInfo_impl.h +++ b/src/TNL/SystemInfo.hpp @@ -18,10 +18,9 @@ #include <sys/utsname.h> #include <sys/stat.h> -#include <TNL/Devices/SystemInfo.h> +#include <TNL/SystemInfo.h> namespace TNL { -namespace Devices { inline String SystemInfo::getHostname( void ) @@ -215,5 +214,4 @@ SystemInfo::parseCPUInfo( void ) return info; } -} // namespace Devices } // namespace TNL diff --git a/src/TNL/TypeInfo.h b/src/TNL/TypeInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..61377fbb8593ce9659c75dc355ad8abfe0838333 --- /dev/null +++ b/src/TNL/TypeInfo.h @@ -0,0 +1,107 @@ +/*************************************************************************** + TypeInfo.h - description + ------------------- + begin : Aug 20, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <typeinfo> +#include <string> + +#if defined( __has_include ) + #if __has_include(<cxxabi.h>) + #define TNL_HAS_CXXABI_H + #endif +#elif defined( __GLIBCXX__ ) || defined( __GLIBCPP__ ) + #define TNL_HAS_CXXABI_H +#endif + +#if defined( TNL_HAS_CXXABI_H ) + #include <cxxabi.h> // abi::__cxa_demangle + #include <memory> // std::unique_ptr + #include <cstdlib> // std::free +#endif + +#include <TNL/TypeTraits.h> +#include <TNL/String.h> + +namespace TNL { +namespace __getType_impl { + +inline std::string +demangle( const char* name ) +{ +#if defined( TNL_HAS_CXXABI_H ) + int status = 0; + std::size_t size = 0; + std::unique_ptr<char[], void (*)(void*)> result( + abi::__cxa_demangle( name, NULL, &size, &status ), + std::free + ); + if( result.get() ) + return result.get(); +#endif + return name; +} + +} // namespace __getType_impl + +/** + * \brief Returns a human-readable string representation of given type. + * + * Note that since we use the \ref typeid operator internally, the top-level + * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143 + * for details. + */ +template< typename T > +String getType() +{ + return __getType_impl::demangle( typeid(T).name() ); +} + +/** + * \brief Returns a human-readable string representation of given object's type. + * + * Note that since we use the \ref typeid operator internally, the top-level + * cv-qualifiers are always ignored. See https://stackoverflow.com/a/8889143 + * for details. + */ +template< typename T > +String getType( T&& obj ) +{ + return __getType_impl::demangle( typeid(obj).name() ); +} + +/** + * \brief Returns a string identifying a type for the purpose of serialization. + * + * By default, this function returns the same string as \ref getType. However, + * if a user-defined class has a static \e getSerializationType method, it is + * called instead. This is useful for overriding the default \ref typeid name, + * which may be necessary e.g. for class templates which should have the same + * serialization type for multiple devices. + */ +template< typename T, + std::enable_if_t< ! HasStaticGetSerializationType< T >::value, bool > = true > +String getSerializationType() +{ + return getType< T >(); +} + +/** + * \brief Specialization of \ref getSerializationType for types which provide a + * static \e getSerializationType method to override the default behaviour. + */ +template< typename T, + std::enable_if_t< HasStaticGetSerializationType< T >::value, bool > = true > +String getSerializationType() +{ + return T::getSerializationType(); +} + +} // namespace TNL diff --git a/src/TNL/TypeTraits.h b/src/TNL/TypeTraits.h index d34f7d39fc223dc9a2b351c3885557357741f656..d617f2b42454bce2fad7a5d2ff69b685574723c9 100644 --- a/src/TNL/TypeTraits.h +++ b/src/TNL/TypeTraits.h @@ -76,7 +76,6 @@ public: static constexpr bool value = type::value; }; - /** * \brief Type trait for checking if T has operator[] taking one index argument. */ @@ -183,4 +182,31 @@ struct IsViewType std::is_same< typename T::ViewType, T >::value > {}; +/** + * \brief Type trait for checking if T has a static getSerializationType method. + */ +template< typename T > +class HasStaticGetSerializationType +{ +private: + template< typename U > + static constexpr auto check(U*) + -> typename + std::enable_if_t< + ! std::is_same< + decltype( U::getSerializationType() ), + void + >::value, + std::true_type + >; + + template< typename > + static constexpr std::false_type check(...); + + using type = decltype(check<T>(0)); + +public: + static constexpr bool value = type::value; +}; + } //namespace TNL diff --git a/src/TNL/param-types.h b/src/TNL/param-types.h deleted file mode 100644 index 228b742793243624e4c9c4d611c1e84e2e77c660..0000000000000000000000000000000000000000 --- a/src/TNL/param-types.h +++ /dev/null @@ -1,91 +0,0 @@ -/*************************************************************************** - param-types.h - description - ------------------- - begin : 2009/07/29 - copyright : (C) 2009 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <vector> -#include <type_traits> - -#include <TNL/Experimental/Arithmetics/Real.h> -#include <TNL/String.h> - -namespace TNL { - -namespace __getType_impl { - -template< typename T, - bool isEnum = std::is_enum< T >::value > -struct getTypeHelper -{ - static String get() { return T::getType(); } -}; - -template<> struct getTypeHelper< void, false >{ static String get() { return String( "void" ); }; }; -template<> struct getTypeHelper< bool, false >{ static String get() { return String( "bool" ); }; }; - -template<> struct getTypeHelper< char, false >{ static String get() { return String( "char" ); }; }; -template<> struct getTypeHelper< short int, false >{ static String get() { return String( "short int" ); }; }; -template<> struct getTypeHelper< int, false >{ static String get() { return String( "int" ); }; }; -template<> struct getTypeHelper< long int, false >{ static String get() { return String( "long int" ); }; }; - -template<> struct getTypeHelper< unsigned char, false >{ static String get() { return String( "unsigned char" ); }; }; -template<> struct getTypeHelper< unsigned short, false >{ static String get() { return String( "unsigned short" ); }; }; -template<> struct getTypeHelper< unsigned int, false >{ static String get() { return String( "unsigned int" ); }; }; -template<> struct getTypeHelper< unsigned long, false >{ static String get() { return String( "unsigned long" ); }; }; - -template<> struct getTypeHelper< signed char, false >{ static String get() { return String( "signed char" ); }; }; - -template<> struct getTypeHelper< float, false >{ static String get() { return String( "float" ); }; }; -template<> struct getTypeHelper< double, false >{ static String get() { return String( "double" ); }; }; -template<> struct getTypeHelper< long double, false >{ static String get() { return String( "long double" ); }; }; -template<> struct getTypeHelper< tnlFloat, false >{ static String get() { return String( "tnlFloat" ); }; }; -template<> struct getTypeHelper< tnlDouble, false >{ static String get() { return String( "tnlDouble" ); }; }; - -// const specializations -template<> struct getTypeHelper< const void, false >{ static String get() { return String( "const void" ); }; }; -template<> struct getTypeHelper< const bool, false >{ static String get() { return String( "const bool" ); }; }; - -template<> struct getTypeHelper< const char, false >{ static String get() { return String( "const char" ); }; }; -template<> struct getTypeHelper< const short int, false >{ static String get() { return String( "const short int" ); }; }; -template<> struct getTypeHelper< const int, false >{ static String get() { return String( "const int" ); }; }; -template<> struct getTypeHelper< const long int, false >{ static String get() { return String( "const long int" ); }; }; - -template<> struct getTypeHelper< const unsigned char, false >{ static String get() { return String( "const unsigned char" ); }; }; -template<> struct getTypeHelper< const unsigned short, false >{ static String get() { return String( "const unsigned short" ); }; }; -template<> struct getTypeHelper< const unsigned int, false >{ static String get() { return String( "const unsigned int" ); }; }; -template<> struct getTypeHelper< const unsigned long, false >{ static String get() { return String( "const unsigned long" ); }; }; - -template<> struct getTypeHelper< const signed char, false >{ static String get() { return String( "const signed char" ); }; }; - -template<> struct getTypeHelper< const float, false >{ static String get() { return String( "const float" ); }; }; -template<> struct getTypeHelper< const double, false >{ static String get() { return String( "const double" ); }; }; -template<> struct getTypeHelper< const long double, false >{ static String get() { return String( "const long double" ); }; }; -template<> struct getTypeHelper< const tnlFloat, false >{ static String get() { return String( "const tnlFloat" ); }; }; -template<> struct getTypeHelper< const tnlDouble, false >{ static String get() { return String( "const tnlDouble" ); }; }; - -template< typename T > -struct getTypeHelper< T, true > -{ - static String get() { return getTypeHelper< typename std::underlying_type< T >::type, false >::get(); }; -}; - -// wrappers for STL containers -template< typename T > -struct getTypeHelper< std::vector< T >, false > -{ - static String get() { return String( "std::vector< " ) + getTypeHelper< T >::get() + " >"; } -}; - -} // namespace __getType_impl - -template< typename T > -String getType() { return __getType_impl::getTypeHelper< T >::get(); } - -} // namespace TNL diff --git a/src/Tools/tnl-dicom-reader.cpp b/src/Tools/tnl-dicom-reader.cpp index f6931e5f47f21b7ce404e6bc90c384b2aeb749d7..c0f770e497b95ba2758cb852a36a3c3ccf562069 100644 --- a/src/Tools/tnl-dicom-reader.cpp +++ b/src/Tools/tnl-dicom-reader.cpp @@ -37,7 +37,7 @@ bool processDicomFiles( const Config::ParameterContainer& parameters ) bool processDicomSeries( const Config::ParameterContainer& parameters ) { - const Containers::List< String >& dicomSeriesNames = parameters.getParameter< Containers::List< String > >( "dicom-series" ); + const std::vector< String >& dicomSeriesNames = parameters.getParameter< std::vector< String > >( "dicom-series" ); String meshFile = parameters.getParameter< String >( "mesh-file" ); bool verbose = parameters.getParameter< bool >( "verbose" ); @@ -45,7 +45,7 @@ bool processDicomSeries( const Config::ParameterContainer& parameters ) GridType grid; Containers::Vector< double, Devices::Host, int > vector; Images::RegionOfInterest< int > roi; - for( int i = 0; i < dicomSeriesNames.getSize(); i++ ) + for( std::size_t i = 0; i < dicomSeriesNames.size(); i++ ) { const String& seriesName = dicomSeriesNames[ i ]; std::cout << "Reading a file " << seriesName << std::endl; diff --git a/src/Tools/tnl-lattice-init.h b/src/Tools/tnl-lattice-init.h index 203054f58e4afc34323e2fed4ddec25f511cdc00..71a09636c5c5bec19c10e71ce5869bd2a790f9a2 100644 --- a/src/Tools/tnl-lattice-init.h +++ b/src/Tools/tnl-lattice-init.h @@ -246,9 +246,9 @@ bool resolveProfileReal( const Config::ParameterContainer& parameters ) std::cerr << "MeshFunction is required in profile file " << profileFile << "." << std::endl; return false; } - if( parsedMeshFunctionType[ 1 ] != ProfileMesh::getType() ) + if( parsedMeshFunctionType[ 1 ] != getType< ProfileMesh >() ) { - std::cerr << "The mesh function in the profile file must be defined on " << ProfileMesh::getType() + std::cerr << "The mesh function in the profile file must be defined on " << getType< ProfileMesh >() << " but it is defined on " << parsedMeshFunctionType[ 1 ] << "." << std::endl; return false; } diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in index e67c5e007a02f45bce36eb079ae529c27f7dce0f..89146c200933c51e79eac204d4862dd6c0a073c5 100644 --- a/src/Tools/tnl-quickstart/operator-grid-specialization.h.in +++ b/src/Tools/tnl-quickstart/operator-grid-specialization.h.in @@ -14,8 +14,6 @@ class {operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, Mesh typedef TNL::Functions::MeshFunction< MeshType > MeshFunctionType; enum {{ Dimension = MeshType::getMeshDimension() }}; - static TNL::String getType(); - template< typename MeshFunction, typename MeshEntity > __cuda_callable__ Real operator()( const MeshFunction& u, diff --git a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in index da4da6d635d10d1681d689972d5e93695e47b4dd..ed00005bcff2c159df519fc9493def8b18a238e7 100644 --- a/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in +++ b/src/Tools/tnl-quickstart/operator-grid-specialization_impl.h.in @@ -1,21 +1,6 @@ /**** * {meshDimension}D problem */ -template< typename MeshReal, - typename Device, - typename MeshIndex, - typename Real, - typename Index > -TNL::String -{operatorName}< TNL::Meshes::Grid< {meshDimension}, MeshReal, Device, MeshIndex >, Real, Index >:: -getType() -{{ - return TNL::String( "{operatorName}< " ) + - MeshType::getType() + ", " + - TNL::getType< Real >() + ", " + - TNL::getType< Index >() + " >"; -}} - template< typename MeshReal, typename Device, typename MeshIndex, diff --git a/src/Tools/tnl-quickstart/problem.h.in b/src/Tools/tnl-quickstart/problem.h.in index 9006f7cf7c5f1e39e5b59333fe888e5d56b479d5..d72120c1fe54dc34d06f7c49832dee3214772ff7 100644 --- a/src/Tools/tnl-quickstart/problem.h.in +++ b/src/Tools/tnl-quickstart/problem.h.in @@ -38,8 +38,6 @@ class {problemBaseName}Problem: using CommunicatorType = Communicator; - static TNL::String getTypeStatic(); - TNL::String getPrologHeader() const; void writeProlog( TNL::Logger& logger, diff --git a/src/Tools/tnl-quickstart/problem_impl.h.in b/src/Tools/tnl-quickstart/problem_impl.h.in index f196ebcec1922b51539ca2f5794ba8b8324be368..3e72e4db125e0b9f6f17628f07539c218ff10907 100644 --- a/src/Tools/tnl-quickstart/problem_impl.h.in +++ b/src/Tools/tnl-quickstart/problem_impl.h.in @@ -7,18 +7,6 @@ #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h> #include <TNL/Solvers/PDE/BackwardTimeDiscretisation.h> -template< typename Mesh, - typename Communicator, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator > -TNL::String -{problemBaseName}Problem< Mesh, Communicator, BoundaryCondition, RightHandSide, DifferentialOperator >:: -getTypeStatic() -{{ - return TNL::String( "{problemBaseName}Problem< " ) + Mesh :: getTypeStatic() + " >"; -}} - template< typename Mesh, typename Communicator, typename BoundaryCondition, diff --git a/src/Tools/tnl-view.h b/src/Tools/tnl-view.h index cd7cd93b9a2f0729cd8e9fd1e3628e6a5e58f6dd..7e7b82bbfc8165245fad06e1e97dad072e621442 100644 --- a/src/Tools/tnl-view.h +++ b/src/Tools/tnl-view.h @@ -52,7 +52,7 @@ bool writeMeshFunction( const typename MeshFunction::MeshPointer& meshPointer, { MeshFunction function( meshPointer ); - std::cout << "Mesh function: " << function.getType() << std::endl; + std::cout << "Mesh function: " << getType( function ) << std::endl; try { function.load( inputFileName ); @@ -84,7 +84,7 @@ bool writeVectorField( const typename VectorField::FunctionType::MeshPointer& me { VectorField field( meshPointer ); - std::cout << "VectorField: " << field.getType() << std::endl; + std::cout << "VectorField: " << getType( field ) << std::endl; try { field.load( inputFileName ); diff --git a/src/UnitTests/Algorithms/CMakeLists.txt b/src/UnitTests/Algorithms/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..6870bc84e402f924e48f24f7e95fe8d52dac9434 --- /dev/null +++ b/src/UnitTests/Algorithms/CMakeLists.txt @@ -0,0 +1,29 @@ +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cu + OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu + OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE( MemoryOperationsTest MemoryOperationsTest.cpp ) + TARGET_COMPILE_OPTIONS( MemoryOperationsTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MemoryOperationsTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp ) + TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp ) + TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) +ENDIF( BUILD_CUDA ) + + +ADD_TEST( MemoryOperationsTest ${EXECUTABLE_OUTPUT_PATH}/MemoryOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Algorithms/MemoryOperationsTest.cpp b/src/UnitTests/Algorithms/MemoryOperationsTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..976447eef2447bf0fee4f7580f106ae561aa9168 --- /dev/null +++ b/src/UnitTests/Algorithms/MemoryOperationsTest.cpp @@ -0,0 +1 @@ +#include "MemoryOperationsTest.h" diff --git a/src/UnitTests/Algorithms/MemoryOperationsTest.cu b/src/UnitTests/Algorithms/MemoryOperationsTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..976447eef2447bf0fee4f7580f106ae561aa9168 --- /dev/null +++ b/src/UnitTests/Algorithms/MemoryOperationsTest.cu @@ -0,0 +1 @@ +#include "MemoryOperationsTest.h" diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Algorithms/MemoryOperationsTest.h similarity index 57% rename from src/UnitTests/Containers/ArrayOperationsTest.h rename to src/UnitTests/Algorithms/MemoryOperationsTest.h index 4a48261be0401eed2231a007d7e68dfed711cb2b..ebfb01f1bf62144d2ff950c4d3265cc7474dab3b 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Algorithms/MemoryOperationsTest.h @@ -1,5 +1,5 @@ /*************************************************************************** - ArrayOperationsTest.h - description + MemoryOperationsTest.h - description ------------------- begin : Jul 15, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -13,19 +13,19 @@ #ifdef HAVE_GTEST #include <TNL/Allocators/Host.h> #include <TNL/Allocators/Cuda.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Algorithms/MemoryOperations.h> +#include <TNL/Algorithms/MultiDeviceMemoryOperations.h> #include "gtest/gtest.h" using namespace TNL; -using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; constexpr int ARRAY_TEST_SIZE = 5000; // test fixture for typed tests template< typename Value > -class ArrayOperationsTest : public ::testing::Test +class MemoryOperationsTest : public ::testing::Test { protected: using ValueType = Value; @@ -34,9 +34,9 @@ protected: // types for which ArrayTest is instantiated using ValueTypes = ::testing::Types< short int, int, long, float, double >; -TYPED_TEST_SUITE( ArrayOperationsTest, ValueTypes ); +TYPED_TEST_SUITE( MemoryOperationsTest, ValueTypes ); -TYPED_TEST( ArrayOperationsTest, allocateMemory_host ) +TYPED_TEST( MemoryOperationsTest, allocateMemory_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -48,7 +48,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_host ) allocator.deallocate( data, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, setElement_host ) +TYPED_TEST( MemoryOperationsTest, setElement_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -56,27 +56,27 @@ TYPED_TEST( ArrayOperationsTest, setElement_host ) Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) { - ArrayOperations< Devices::Host >::setElement( data + i, (ValueType) i ); + MemoryOperations< Devices::Host >::setElement( data + i, (ValueType) i ); EXPECT_EQ( data[ i ], i ); - EXPECT_EQ( ArrayOperations< Devices::Host >::getElement( data + i ), i ); + EXPECT_EQ( MemoryOperations< Devices::Host >::getElement( data + i ), i ); } allocator.deallocate( data, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, set_host ) +TYPED_TEST( MemoryOperationsTest, set_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; Allocator allocator; ValueType* data = allocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( data, (ValueType) 13, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( data[ i ], 13 ); allocator.deallocate( data, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, copy_host ) +TYPED_TEST( MemoryOperationsTest, copy_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -84,15 +84,15 @@ TYPED_TEST( ArrayOperationsTest, copy_host ) Allocator allocator; ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE ); ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( data1, (ValueType) 13, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::copy< ValueType, ValueType >( data2, data1, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( data1[ i ], data2[ i ]); allocator.deallocate( data1, ARRAY_TEST_SIZE ); allocator.deallocate( data2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, copyWithConversion_host ) +TYPED_TEST( MemoryOperationsTest, copyWithConversion_host ) { using Allocator1 = Allocators::Host< int >; using Allocator2 = Allocators::Host< float >; @@ -101,15 +101,15 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversion_host ) Allocator2 allocator2; int* data1 = allocator1.allocate( ARRAY_TEST_SIZE ); float* data2 = allocator2.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( data1, 13, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::copy< float, int >( data2, data1, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( data1[ i ], data2[ i ] ); allocator1.deallocate( data1, ARRAY_TEST_SIZE ); allocator2.deallocate( data2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, compare_host ) +TYPED_TEST( MemoryOperationsTest, compare_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -117,16 +117,16 @@ TYPED_TEST( ArrayOperationsTest, compare_host ) Allocator allocator; ValueType* data1 = allocator.allocate( ARRAY_TEST_SIZE ); ValueType* data2 = allocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE ); - EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) ); - ArrayOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE ); - EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) ); + MemoryOperations< Devices::Host >::set( data1, (ValueType) 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( data2, (ValueType) 0, ARRAY_TEST_SIZE ); + EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) ); + MemoryOperations< Devices::Host >::set( data2, (ValueType) 7, ARRAY_TEST_SIZE ); + EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( data1, data2, ARRAY_TEST_SIZE ) ) ); allocator.deallocate( data1, ARRAY_TEST_SIZE ); allocator.deallocate( data2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, compareWithConversion_host ) +TYPED_TEST( MemoryOperationsTest, compareWithConversion_host ) { using Allocator1 = Allocators::Host< int >; using Allocator2 = Allocators::Host< float >; @@ -135,16 +135,16 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversion_host ) Allocator2 allocator2; int* data1 = allocator1.allocate( ARRAY_TEST_SIZE ); float* data2 = allocator2.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE ); - EXPECT_FALSE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) ); - ArrayOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE ); - EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) ); + MemoryOperations< Devices::Host >::set( data1, 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( data2, (float) 0.0, ARRAY_TEST_SIZE ); + EXPECT_FALSE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) ); + MemoryOperations< Devices::Host >::set( data2, (float) 7.0, ARRAY_TEST_SIZE ); + EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< int, float >( data1, data2, ARRAY_TEST_SIZE ) ) ); allocator1.deallocate( data1, ARRAY_TEST_SIZE ); allocator2.deallocate( data2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, containsValue_host ) +TYPED_TEST( MemoryOperationsTest, containsValue_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -155,14 +155,14 @@ TYPED_TEST( ArrayOperationsTest, containsValue_host ) for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) data[ i ] = i % 10; for( int i = 0; i < 10; i++ ) - EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 10; i < 20; i++ ) - EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); allocator.deallocate( data, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host ) +TYPED_TEST( MemoryOperationsTest, containsOnlyValue_host ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Host< ValueType >; @@ -173,18 +173,18 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_host ) for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) data[ i ] = i % 10; for( int i = 0; i < 20; i++ ) - EXPECT_FALSE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_FALSE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) data[ i ] = 10; - EXPECT_TRUE( ( ArrayOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); + EXPECT_TRUE( ( MemoryOperations< Devices::Host >::containsOnlyValue( data, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); allocator.deallocate( data, ARRAY_TEST_SIZE ); } #ifdef HAVE_CUDA -TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda ) +TYPED_TEST( MemoryOperationsTest, allocateMemory_cuda ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Cuda< ValueType >; @@ -198,7 +198,7 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda ) ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } -TYPED_TEST( ArrayOperationsTest, setElement_cuda ) +TYPED_TEST( MemoryOperationsTest, setElement_cuda ) { using ValueType = typename TestFixture::ValueType; using Allocator = Allocators::Cuda< ValueType >; @@ -208,21 +208,21 @@ TYPED_TEST( ArrayOperationsTest, setElement_cuda ) ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) - ArrayOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i ); + MemoryOperations< Devices::Cuda >::setElement( &data[ i ], (ValueType) i ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) { ValueType d; ASSERT_EQ( cudaMemcpy( &d, &data[ i ], sizeof( ValueType ), cudaMemcpyDeviceToHost ), cudaSuccess ); EXPECT_EQ( d, i ); - EXPECT_EQ( ArrayOperations< Devices::Cuda >::getElement( &data[ i ] ), i ); + EXPECT_EQ( MemoryOperations< Devices::Cuda >::getElement( &data[ i ] ), i ); } allocator.deallocate( data, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } -TYPED_TEST( ArrayOperationsTest, set_cuda ) +TYPED_TEST( MemoryOperationsTest, set_cuda ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; @@ -232,10 +232,10 @@ TYPED_TEST( ArrayOperationsTest, set_cuda ) CudaAllocator cudaAllocator; ValueType* hostData = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( hostData, (ValueType) 0, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 13, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); - ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) EXPECT_EQ( hostData[ i ], 13 ); @@ -243,7 +243,7 @@ TYPED_TEST( ArrayOperationsTest, set_cuda ) cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, copy_cuda ) +TYPED_TEST( MemoryOperationsTest, copy_cuda ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; @@ -255,18 +255,18 @@ TYPED_TEST( ArrayOperationsTest, copy_cuda ) ValueType* hostData2 = hostAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE ); - EXPECT_TRUE( ( ArrayOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) ); + MemoryOperations< Devices::Host >::set( hostData, (ValueType) 13, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::copy< ValueType, ValueType >( deviceData2, deviceData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< ValueType, ValueType >( hostData2, deviceData2, ARRAY_TEST_SIZE ); + EXPECT_TRUE( ( MemoryOperations< Devices::Host >::compare< ValueType, ValueType >( hostData, hostData2, ARRAY_TEST_SIZE) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); hostAllocator.deallocate( hostData2, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda ) +TYPED_TEST( MemoryOperationsTest, copyWithConversions_cuda ) { using HostAllocator1 = Allocators::Host< int >; using HostAllocator2 = Allocators::Host< double >; @@ -281,10 +281,10 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda ) double* hostData2 = hostAllocator2.allocate( ARRAY_TEST_SIZE ); long* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE ); float* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Host >::set( hostData, 13, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy< long, int >( deviceData, hostData, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::copy< float, long >( deviceData2, deviceData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::copy< double, float >( hostData2, deviceData2, ARRAY_TEST_SIZE ); for( int i = 0; i < ARRAY_TEST_SIZE; i ++ ) EXPECT_EQ( hostData[ i ], hostData2[ i ] ); hostAllocator1.deallocate( hostData, ARRAY_TEST_SIZE ); @@ -293,7 +293,7 @@ TYPED_TEST( ArrayOperationsTest, copyWithConversions_cuda ) cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, compare_cuda ) +TYPED_TEST( MemoryOperationsTest, compare_cuda ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; @@ -305,25 +305,25 @@ TYPED_TEST( ArrayOperationsTest, compare_cuda ) ValueType* deviceData = cudaAllocator.allocate( ARRAY_TEST_SIZE ); ValueType* deviceData2 = cudaAllocator.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE ); - EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); + MemoryOperations< Devices::Host >::set( hostData, (ValueType) 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 8, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 9, ARRAY_TEST_SIZE ); + EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); + EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); + EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); - ArrayOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE ); - EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); - EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); - EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); + MemoryOperations< Devices::Cuda >::set( deviceData, (ValueType) 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData2, (ValueType) 7, ARRAY_TEST_SIZE ); + EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< ValueType, ValueType >( hostData, deviceData, ARRAY_TEST_SIZE ) )); + EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< ValueType, ValueType >( deviceData, hostData, ARRAY_TEST_SIZE ) )); + EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< ValueType, ValueType >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda ) +TYPED_TEST( MemoryOperationsTest, compareWithConversions_cuda ) { using HostAllocator = Allocators::Host< int >; using CudaAllocator1 = Allocators::Cuda< float >; @@ -336,25 +336,25 @@ TYPED_TEST( ArrayOperationsTest, compareWithConversions_cuda ) float* deviceData = cudaAllocator1.allocate( ARRAY_TEST_SIZE ); double* deviceData2 = cudaAllocator2.allocate( ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE ); - EXPECT_FALSE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); - EXPECT_FALSE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); + MemoryOperations< Devices::Host >::set( hostData, 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData, (float) 8, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 9, ARRAY_TEST_SIZE ); + EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); + EXPECT_FALSE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); + EXPECT_FALSE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); - ArrayOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE ); - ArrayOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE ); - EXPECT_TRUE(( ArrayOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); - EXPECT_TRUE(( ArrayOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); - EXPECT_TRUE(( ArrayOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); + MemoryOperations< Devices::Cuda >::set( deviceData, (float) 7, ARRAY_TEST_SIZE ); + MemoryOperations< Devices::Cuda >::set( deviceData2, (double) 7, ARRAY_TEST_SIZE ); + EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Host, Devices::Cuda >::compare< int, float >( hostData, deviceData, ARRAY_TEST_SIZE ) )); + EXPECT_TRUE(( MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::compare< float, int >( deviceData, hostData, ARRAY_TEST_SIZE ) )); + EXPECT_TRUE(( MemoryOperations< Devices::Cuda >::compare< float, double >( deviceData, deviceData2, ARRAY_TEST_SIZE ) )); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); cudaAllocator1.deallocate( deviceData, ARRAY_TEST_SIZE ); cudaAllocator2.deallocate( deviceData2, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, containsValue_cuda ) +TYPED_TEST( MemoryOperationsTest, containsValue_cuda ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; @@ -367,18 +367,18 @@ TYPED_TEST( ArrayOperationsTest, containsValue_cuda ) for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; - ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 10; i++ ) - EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 10; i < 20; i++ ) - EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); } -TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) +TYPED_TEST( MemoryOperationsTest, containsOnlyValue_cuda ) { using ValueType = typename TestFixture::ValueType; using HostAllocator = Allocators::Host< ValueType >; @@ -391,16 +391,16 @@ TYPED_TEST( ArrayOperationsTest, containsOnlyValue_cuda ) for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = i % 10; - ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); for( int i = 0; i < 20; i++ ) - EXPECT_FALSE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); + EXPECT_FALSE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) i ) ) ); for( int i = 0; i < ARRAY_TEST_SIZE; i++ ) hostData[ i ] = 10; - ArrayOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); + MultiDeviceMemoryOperations< Devices::Cuda, Devices::Host >::copy( deviceData, hostData, ARRAY_TEST_SIZE ); - EXPECT_TRUE( ( ArrayOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); + EXPECT_TRUE( ( MemoryOperations< Devices::Cuda >::containsOnlyValue( deviceData, ARRAY_TEST_SIZE, (ValueType) 10 ) ) ); hostAllocator.deallocate( hostData, ARRAY_TEST_SIZE ); cudaAllocator.deallocate( deviceData, ARRAY_TEST_SIZE ); diff --git a/src/UnitTests/Containers/MultireductionTest.cpp b/src/UnitTests/Algorithms/MultireductionTest.cpp similarity index 100% rename from src/UnitTests/Containers/MultireductionTest.cpp rename to src/UnitTests/Algorithms/MultireductionTest.cpp diff --git a/src/UnitTests/Containers/MultireductionTest.cu b/src/UnitTests/Algorithms/MultireductionTest.cu similarity index 100% rename from src/UnitTests/Containers/MultireductionTest.cu rename to src/UnitTests/Algorithms/MultireductionTest.cu diff --git a/src/UnitTests/Containers/MultireductionTest.h b/src/UnitTests/Algorithms/MultireductionTest.h similarity index 92% rename from src/UnitTests/Containers/MultireductionTest.h rename to src/UnitTests/Algorithms/MultireductionTest.h index 7a321f5836cb2e0b2737c6176eb0b23794c4a501..ec674d935d579ed76b6ba4afc30b1343ca017c51 100644 --- a/src/UnitTests/Containers/MultireductionTest.h +++ b/src/UnitTests/Algorithms/MultireductionTest.h @@ -15,11 +15,11 @@ #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> -#include <TNL/Containers/Algorithms/Multireduction.h> +#include <TNL/Algorithms/Multireduction.h> using namespace TNL; using namespace TNL::Containers; -using namespace TNL::Containers::Algorithms; +using namespace TNL::Algorithms; template< typename View > void setLinearSequence( View& deviceVector ) @@ -50,8 +50,8 @@ class MultireductionTest : public ::testing::Test protected: using DeviceVector = Vector; using DeviceView = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >; - using HostVector = typename DeviceVector::HostType; - using HostView = typename DeviceView::HostType; + using HostVector = typename DeviceVector::template Self< typename DeviceVector::RealType, Devices::Sequential >; + using HostView = typename DeviceView::template Self< typename DeviceView::RealType, Devices::Sequential >; // should be small enough to have fast tests, but larger than minGPUReductionDataSize // and large enough to require multiple CUDA blocks for reduction diff --git a/src/UnitTests/ParallelForTest.cpp b/src/UnitTests/Algorithms/ParallelForTest.cpp similarity index 100% rename from src/UnitTests/ParallelForTest.cpp rename to src/UnitTests/Algorithms/ParallelForTest.cpp diff --git a/src/UnitTests/ParallelForTest.cu b/src/UnitTests/Algorithms/ParallelForTest.cu similarity index 100% rename from src/UnitTests/ParallelForTest.cu rename to src/UnitTests/Algorithms/ParallelForTest.cu diff --git a/src/UnitTests/ParallelForTest.h b/src/UnitTests/Algorithms/ParallelForTest.h similarity index 86% rename from src/UnitTests/ParallelForTest.h rename to src/UnitTests/Algorithms/ParallelForTest.h index 95455286e796f536215166c30c8173d52a14e785..aa75fd56093df72bb83b126fde7b3f77e363aa66 100644 --- a/src/UnitTests/ParallelForTest.h +++ b/src/UnitTests/Algorithms/ParallelForTest.h @@ -11,7 +11,7 @@ #include <TNL/Devices/Host.h> #include <TNL/Devices/Cuda.h> #include <TNL/Containers/Array.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> #ifdef HAVE_GTEST #include <gtest/gtest.h> @@ -38,7 +38,7 @@ TEST( ParallelForTest, 1D_host ) { view[i] = i; }; - ParallelFor< Devices::Host >::exec( 0, size, kernel ); + Algorithms::ParallelFor< Devices::Host >::exec( 0, size, kernel ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -65,7 +65,7 @@ TEST( ParallelForTest, 2D_host ) { view[i] = i; }; - ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 ); + Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, size, 1, kernel1 ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -77,7 +77,7 @@ TEST( ParallelForTest, 2D_host ) { view[j] = j; }; - ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 ); + Algorithms::ParallelFor2D< Devices::Host >::exec( 0, 0, 1, size, kernel2 ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -104,7 +104,7 @@ TEST( ParallelForTest, 3D_host ) { view[i] = i; }; - ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 ); + Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, size, 1, 1, kernel1 ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -116,7 +116,7 @@ TEST( ParallelForTest, 3D_host ) { view[j] = j; }; - ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 ); + Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, size, 1, kernel2 ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -128,7 +128,7 @@ TEST( ParallelForTest, 3D_host ) { view[k] = k; }; - ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 ); + Algorithms::ParallelFor3D< Devices::Host >::exec( 0, 0, 0, 1, 1, size, kernel3 ); if( a != expected ) { for (int i = 0; i < size; i++) @@ -158,7 +158,7 @@ void test_1D_cuda() { view[i] = i; }; - ParallelFor< Devices::Cuda >::exec( 0, size, kernel ); + Algorithms::ParallelFor< Devices::Cuda >::exec( 0, size, kernel ); ArrayHost ah; ah = a; @@ -194,7 +194,7 @@ void test_2D_cuda() { view[i] = i; }; - ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 ); + Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, size, 1, kernel1 ); ArrayHost ah; ah = a; @@ -208,7 +208,7 @@ void test_2D_cuda() { view[j] = j; }; - ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 ); + Algorithms::ParallelFor2D< Devices::Cuda >::exec( 0, 0, 1, size, kernel2 ); ah = a; if( ah != expected ) { @@ -243,7 +243,7 @@ void test_3D_cuda() { view[i] = i; }; - ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 ); + Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, size, 1, 1, kernel1 ); ArrayHost ah; ah = a; @@ -257,7 +257,7 @@ void test_3D_cuda() { view[j] = j; }; - ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 ); + Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, size, 1, kernel2 ); ah = a; if( ah != expected ) { @@ -270,7 +270,7 @@ void test_3D_cuda() { view[k] = k; }; - ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 ); + Algorithms::ParallelFor3D< Devices::Cuda >::exec( 0, 0, 0, 1, 1, size, kernel3 ); ah = a; if( ah != expected ) { @@ -287,4 +287,4 @@ TEST( ParallelForTest, 3D_cuda ) #endif #endif -#include "main.h" +#include "../main.h" diff --git a/src/UnitTests/AllocatorsTest.h b/src/UnitTests/AllocatorsTest.h index 5434a495085d38ad345f0ece4f530aba2184a6ba..16438e082be4c8cf380f7bd2e2e935c0761e2c2f 100644 --- a/src/UnitTests/AllocatorsTest.h +++ b/src/UnitTests/AllocatorsTest.h @@ -15,7 +15,7 @@ #include <TNL/Allocators/Cuda.h> #include <TNL/Allocators/CudaHost.h> #include <TNL/Allocators/CudaManaged.h> -#include <TNL/Containers/Algorithms/ArrayOperations.h> +#include <TNL/Algorithms/MemoryOperations.h> #include "gtest/gtest.h" @@ -83,7 +83,7 @@ TYPED_TEST( AllocatorsTest, CudaManaged ) ASSERT_NE( data, nullptr ); // set data on the device - Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE ); + Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); // check values on the host @@ -103,7 +103,7 @@ TYPED_TEST( AllocatorsTest, Cuda ) ASSERT_NE( data, nullptr ); // set data on the device - Containers::Algorithms::ArrayOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE ); + Algorithms::MemoryOperations< Devices::Cuda >::set( data, (ValueType) 0, ARRAY_TEST_SIZE ); ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); allocator.deallocate( data, ARRAY_TEST_SIZE ); diff --git a/src/UnitTests/AssertCudaTest.cu b/src/UnitTests/AssertCudaTest.cu index 9d4865eb9c8ba3b7aaa6f8bf5506fabe17be4483..8f42da6772dc2b4b6dbe3f185ca96bc8efde9290 100644 --- a/src/UnitTests/AssertCudaTest.cu +++ b/src/UnitTests/AssertCudaTest.cu @@ -13,7 +13,7 @@ #endif #include <TNL/Assert.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Cuda/CheckDevice.h> #include <TNL/Exceptions/CudaRuntimeError.h> #ifdef HAVE_GTEST diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt index a9fdeab528afa901f91eb5dbd23a35ed1bf018ef..6bfae47e1604e174272b19c1a0e5cfbe109c7dcc 100644 --- a/src/UnitTests/CMakeLists.txt +++ b/src/UnitTests/CMakeLists.txt @@ -44,19 +44,14 @@ ADD_EXECUTABLE( ObjectTest ObjectTest.cpp ) TARGET_COMPILE_OPTIONS( ObjectTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ObjectTest ${GTEST_BOTH_LIBRARIES} ) -if( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( ParallelForTest ParallelForTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) -else() - ADD_EXECUTABLE( ParallelForTest ParallelForTest.cpp ) - TARGET_COMPILE_OPTIONS( ParallelForTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( ParallelForTest ${GTEST_BOTH_LIBRARIES} ) -endif() - ADD_EXECUTABLE( TimerTest TimerTest.cpp ) TARGET_COMPILE_OPTIONS( TimerTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( TimerTest ${GTEST_BOTH_LIBRARIES} ) +ADD_EXECUTABLE( TypeInfoTest TypeInfoTest.cpp ) +TARGET_COMPILE_OPTIONS( TypeInfoTest PRIVATE ${CXX_TESTS_FLAGS} ) +TARGET_LINK_LIBRARIES( TypeInfoTest ${GTEST_BOTH_LIBRARIES} ) + ADD_TEST( AssertTest ${EXECUTABLE_OUTPUT_PATH}/AssertTest${CMAKE_EXECUTABLE_SUFFIX} ) if( BUILD_CUDA ) ADD_TEST( AssertCudaTest ${EXECUTABLE_OUTPUT_PATH}/AssertCudaTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -65,5 +60,5 @@ ADD_TEST( AllocatorsTest ${EXECUTABLE_OUTPUT_PATH}/AllocatorsTest${CMAKE_EXECUTA ADD_TEST( FileTest ${EXECUTABLE_OUTPUT_PATH}/FileTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( ParallelForTest ${EXECUTABLE_OUTPUT_PATH}/ParallelForTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( TimerTest ${EXECUTABLE_OUTPUT_PATH}/TimerTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( TypeInfoTest ${EXECUTABLE_OUTPUT_PATH}/TypeInfoTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cpp b/src/UnitTests/Containers/ArrayOperationsTest.cpp deleted file mode 100644 index c499a61b2cb1b50eebf9fc7fedacf56b9c7cb68a..0000000000000000000000000000000000000000 --- a/src/UnitTests/Containers/ArrayOperationsTest.cpp +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - ArrayOperationsTest.cpp - description - ------------------- - begin : Jul 15, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "ArrayOperationsTest.h" diff --git a/src/UnitTests/Containers/ArrayOperationsTest.cu b/src/UnitTests/Containers/ArrayOperationsTest.cu deleted file mode 100644 index 497b40f178e5030838f4d871edfb1bed61a14fde..0000000000000000000000000000000000000000 --- a/src/UnitTests/Containers/ArrayOperationsTest.cu +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - ArrayOperationsTest.cu - description - ------------------- - begin : Jul 16, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "ArrayOperationsTest.h" diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index a18471a4a8c24447b48fe673627471aa28e639f6..ef3119365f50444d89154e487a65a12464849062 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -15,6 +15,7 @@ #include <TNL/Containers/Array.h> #include <TNL/Containers/Vector.h> +#include <TNL/Pointers/DevicePointer.h> #include "gtest/gtest.h" @@ -45,11 +46,6 @@ struct MyData // operator used in tests, not necessary for Array to work template< typename T > bool operator==( T v ) const { return data == v; } - - static String getType() - { - return String( "MyData" ); - } }; std::ostream& operator<<( std::ostream& str, const MyData& v ) @@ -101,27 +97,6 @@ using ArrayTypes = ::testing::Types< Array< float, Devices::Cuda, long >, Array< double, Devices::Cuda, long >, Array< MyData, Devices::Cuda, long > -#endif -#ifdef HAVE_MIC - , - Array< int, Devices::MIC, short >, - Array< long, Devices::MIC, short >, - Array< float, Devices::MIC, short >, - Array< double, Devices::MIC, short >, - // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, short >, - Array< int, Devices::MIC, int >, - Array< long, Devices::MIC, int >, - Array< float, Devices::MIC, int >, - Array< double, Devices::MIC, int >, - // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, int >, - Array< int, Devices::MIC, long >, - Array< long, Devices::MIC, long >, - Array< float, Devices::MIC, long >, - Array< double, Devices::MIC, long > - // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, long > #endif // all array tests should also work with Vector @@ -136,11 +111,6 @@ using ArrayTypes = ::testing::Types< Vector< float, Devices::Cuda, long >, Vector< double, Devices::Cuda, long > #endif -#ifdef HAVE_MIC - , - Vector< float, Devices::MIC, long >, - Vector< double, Devices::MIC, long > -#endif >; TYPED_TEST_SUITE( ArrayTest, ArrayTypes ); @@ -343,9 +313,9 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u ) #ifdef HAVE_CUDA u.setSize( 10 ); using ArrayType = Array< Value, Devices::Cuda, Index >; - ArrayType* kernel_u = Devices::Cuda::passToDevice( u ); - testSetGetElementKernel<<< 1, 16 >>>( kernel_u ); - Devices::Cuda::freeFromDevice( kernel_u ); + Pointers::DevicePointer< ArrayType > kernel_u( u ); + testSetGetElementKernel<<< 1, 16 >>>( &kernel_u.template modifyData< Devices::Cuda >() ); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; for( int i = 0; i < 10; i++ ) { EXPECT_EQ( u.getElement( i ), i ); @@ -353,14 +323,6 @@ void testArrayElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u ) #endif } -template< typename Value, typename Index > -void testArrayElementwiseAccess( Array< Value, Devices::MIC, Index >&& u ) -{ -#ifdef HAVE_MIC - // TODO -#endif -} - TYPED_TEST( ArrayTest, elementwiseAccess ) { using ArrayType = typename TestFixture::ArrayType; @@ -405,9 +367,10 @@ TYPED_TEST( ArrayTest, containsOnlyValue ) TYPED_TEST( ArrayTest, comparisonOperator ) { using ArrayType = typename TestFixture::ArrayType; + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; ArrayType u( 10 ), v( 10 ), w( 10 ); - typename ArrayType::HostType u_host( 10 ); + HostArrayType u_host( 10 ); for( int i = 0; i < 10; i ++ ) { u.setElement( i, i ); u_host.setElement( i, i ); @@ -460,9 +423,10 @@ TYPED_TEST( ArrayTest, comparisonOperatorWithDifferentType ) TYPED_TEST( ArrayTest, assignmentOperator ) { using ArrayType = typename TestFixture::ArrayType; + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; ArrayType u( 10 ), v( 10 ); - typename ArrayType::HostType u_host( 10 ); + HostArrayType u_host( 10 ); for( int i = 0; i < 10; i++ ) { u.setElement( i, i ); u_host.setElement( i, i ); @@ -489,10 +453,12 @@ template< typename ArrayType, typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type > void testArrayAssignmentWithDifferentType() { + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; + ArrayType u( 10 ); Array< short, typename ArrayType::DeviceType, short > v( 10 ); Array< short, Devices::Host, short > v_host( 10 ); - typename ArrayType::HostType u_host( 10 ); + HostArrayType u_host( 10 ); for( int i = 0; i < 10; i++ ) { u.setElement( i, i ); u_host.setElement( i, i ); diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h index 35344eecf1041725b33c84664b8bca2769f3d525..e5a9d5a2091781669d81391f89e9097c4f0b36b5 100644 --- a/src/UnitTests/Containers/ArrayViewTest.h +++ b/src/UnitTests/Containers/ArrayViewTest.h @@ -41,11 +41,6 @@ struct MyData // operator used in tests, not necessary for Array to work template< typename T > bool operator==( T v ) const { return data == v; } - - static String getType() - { - return String( "MyData" ); - } }; std::ostream& operator<<( std::ostream& str, const MyData& v ) @@ -98,27 +93,6 @@ using ViewTypes = ::testing::Types< ArrayView< float, Devices::Cuda, long >, ArrayView< double, Devices::Cuda, long >, ArrayView< MyData, Devices::Cuda, long > -#endif -#ifdef HAVE_MIC - , - ArrayView< int, Devices::MIC, short >, - ArrayView< long, Devices::MIC, short >, - ArrayView< float, Devices::MIC, short >, - ArrayView< double, Devices::MIC, short >, - // TODO: MyData does not work on MIC -// ArrayView< MyData, Devices::MIC, short >, - ArrayView< int, Devices::MIC, int >, - ArrayView< long, Devices::MIC, int >, - ArrayView< float, Devices::MIC, int >, - ArrayView< double, Devices::MIC, int >, - // TODO: MyData does not work on MIC -// ArrayView< MyData, Devices::MIC, int >, - ArrayView< int, Devices::MIC, long >, - ArrayView< long, Devices::MIC, long >, - ArrayView< float, Devices::MIC, long >, - ArrayView< double, Devices::MIC, long >, - // TODO: MyData does not work on MIC -// ArrayView< MyData, Devices::MIC, long >, #endif // all ArrayView tests should also work with VectorView @@ -133,11 +107,6 @@ using ViewTypes = ::testing::Types< VectorView< float, Devices::Cuda, long >, VectorView< double, Devices::Cuda, long > #endif -#ifdef HAVE_MIC - , - VectorView< float, Devices::MIC, long >, - VectorView< double, Devices::MIC, long > -#endif >; TYPED_TEST_SUITE( ArrayViewTest, ViewTypes ); @@ -289,14 +258,6 @@ void testArrayViewElementwiseAccess( Array< Value, Devices::Cuda, Index >&& u ) #endif } -template< typename Value, typename Index > -void testArrayViewElementwiseAccess( Array< Value, Devices::MIC, Index >&& u ) -{ -#ifdef HAVE_MIC - // TODO -#endif -} - TYPED_TEST( ArrayViewTest, elementwiseAccess ) { using ArrayType = typename TestFixture::ArrayType; @@ -375,9 +336,10 @@ TYPED_TEST( ArrayViewTest, comparisonOperator ) { using ArrayType = typename TestFixture::ArrayType; using ViewType = typename TestFixture::ViewType; + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; ArrayType a( 10 ), b( 10 ); - typename ArrayType::HostType a_host( 10 ); + HostArrayType a_host( 10 ); for( int i = 0; i < 10; i ++ ) { a.setElement( i, i ); a_host.setElement( i, i ); @@ -450,9 +412,11 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) using ArrayType = typename TestFixture::ArrayType; using ViewType = typename TestFixture::ViewType; using ConstViewType = VectorView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >; + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; + using HostViewType = typename HostArrayType::ViewType; ArrayType a( 10 ), b( 10 ); - typename ArrayType::HostType a_host( 10 ); + HostArrayType a_host( 10 ); for( int i = 0; i < 10; i++ ) { a.setElement( i, i ); a_host.setElement( i, i ); @@ -460,7 +424,7 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) ViewType u = a.getView(); ViewType v = b.getView(); - typename ViewType::HostType u_host = a_host.getView(); + HostViewType u_host = a_host.getView(); v.setValue( 0 ); v = u; @@ -496,21 +460,25 @@ template< typename ArrayType, typename = typename std::enable_if< std::is_arithmetic< typename ArrayType::ValueType >::value >::type > void testArrayAssignmentWithDifferentType() { + using HostArrayType = typename ArrayType::template Self< typename ArrayType::ValueType, Devices::Sequential >; + ArrayType a( 10 ); Array< short, typename ArrayType::DeviceType, short > b( 10 ); - Array< short, Devices::Host, short > b_host( 10 ); - typename ArrayType::HostType a_host( 10 ); + Array< short, Devices::Sequential, short > b_host( 10 ); + HostArrayType a_host( 10 ); for( int i = 0; i < 10; i++ ) { a.setElement( i, i ); a_host.setElement( i, i ); } using ViewType = ArrayView< typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >; + using HostViewType = typename ViewType::template Self< typename ViewType::ValueType, Devices::Sequential >; ViewType u = a.getView(); - typename ViewType::HostType u_host( a_host ); + HostViewType u_host( a_host ); using ShortViewType = ArrayView< short, typename ArrayType::DeviceType, short >; + using HostShortViewType = ArrayView< short, Devices::Sequential, short >; ShortViewType v( b ); - typename ShortViewType::HostType v_host( b_host ); + HostShortViewType v_host( b_host ); v.setValue( 0 ); v = u; diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index c8cd88af9f3ae8df5109c439aba858bc059bca2d..6ff7570dd3b1a62051ce295180205ddac0675156 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -1,17 +1,3 @@ -ADD_EXECUTABLE( ListTest ListTest.cpp ) -TARGET_COMPILE_OPTIONS( ListTest PRIVATE ${CXX_TESTS_FLAGS} ) -TARGET_LINK_LIBRARIES( ListTest ${GTEST_BOTH_LIBRARIES} ) - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cu - OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} ) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( ArrayOperationsTest ArrayOperationsTest.cpp ) - TARGET_COMPILE_OPTIONS( ArrayOperationsTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( ArrayOperationsTest ${GTEST_BOTH_LIBRARIES} ) -ENDIF( BUILD_CUDA ) - ADD_EXECUTABLE( ArrayTest ArrayTest.cpp ) TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} ) @@ -70,16 +56,6 @@ IF( BUILD_CUDA ) TARGET_LINK_LIBRARIES( VectorVerticalOperationsTestCuda ${GTEST_BOTH_LIBRARIES} ) ENDIF( BUILD_CUDA ) -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu - OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp ) - TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( MultireductionTest ${GTEST_BOTH_LIBRARIES} ) -ENDIF( BUILD_CUDA ) - ADD_EXECUTABLE( StaticArrayTest StaticArrayTest.cpp ) TARGET_COMPILE_OPTIONS( StaticArrayTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( StaticArrayTest ${GTEST_BOTH_LIBRARIES} ) @@ -93,8 +69,6 @@ TARGET_COMPILE_OPTIONS( StaticVectorOperationsTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( StaticVectorOperationsTest ${GTEST_BOTH_LIBRARIES} ) -ADD_TEST( ListTest ${EXECUTABLE_OUTPUT_PATH}/ListTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( ArrayOperationsTest ${EXECUTABLE_OUTPUT_PATH}/ArrayOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -113,7 +87,6 @@ IF( BUILD_CUDA ) ADD_TEST( VectorUnaryOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorUnaryOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( VectorVerticalOperationsTestCuda ${EXECUTABLE_OUTPUT_PATH}/VectorVerticalOperationsTestCuda${CMAKE_EXECUTABLE_SUFFIX} ) ENDIF() -ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( StaticVectorOperationsTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorOperationsTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Containers/DistributedVectorTest.h b/src/UnitTests/Containers/DistributedVectorTest.h index b1844be3bcabfe421ea5e2b8ace54425ef75be2a..2a1834f318fa616d25a77ccccbdb68bb1cc016a4 100644 --- a/src/UnitTests/Containers/DistributedVectorTest.h +++ b/src/UnitTests/Containers/DistributedVectorTest.h @@ -42,18 +42,19 @@ protected: using DistributedVectorType = DistributedVector; using VectorViewType = typename DistributedVectorType::LocalViewType; using DistributedVectorView = Containers::DistributedVectorView< RealType, DeviceType, IndexType, CommunicatorType >; + using HostDistributedVectorType = typename DistributedVectorType::template Self< RealType, Devices::Sequential >; const typename CommunicatorType::CommunicationGroup group = CommunicatorType::AllGroup; DistributedVectorType v; DistributedVectorView v_view; - typename DistributedVectorType::HostType v_host; + HostDistributedVectorType v_host; const int rank = CommunicatorType::GetRank(group); const int nproc = CommunicatorType::GetSize(group); // should be small enough to have fast tests, but large enough to test - // prefix-sum with multiple CUDA grids + // scan with multiple CUDA grids const int globalSize = 10000 * nproc; DistributedVectorTest() @@ -79,7 +80,7 @@ using DistributedVectorTypes = ::testing::Types< TYPED_TEST_SUITE( DistributedVectorTest, DistributedVectorTypes ); -TYPED_TEST( DistributedVectorTest, prefixSum ) +TYPED_TEST( DistributedVectorTest, scan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; @@ -96,21 +97,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -118,21 +119,21 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -146,7 +147,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -154,7 +155,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -162,7 +163,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -171,7 +172,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -179,7 +180,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -187,7 +188,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -198,7 +199,7 @@ TYPED_TEST( DistributedVectorTest, prefixSum ) } } -TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) +TYPED_TEST( DistributedVectorTest, exclusiveScan ) { using RealType = typename TestFixture::DistributedVectorType::RealType; using DeviceType = typename TestFixture::DistributedVectorType::DeviceType; @@ -215,21 +216,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -237,21 +238,21 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -265,7 +266,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -273,7 +274,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -281,7 +282,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -290,7 +291,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -298,7 +299,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) @@ -306,7 +307,7 @@ TYPED_TEST( DistributedVectorTest, exclusivePrefixSum ) setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = localRange.getBegin(); i < localRange.getEnd(); i++ ) diff --git a/src/UnitTests/Containers/ListTest.cpp b/src/UnitTests/Containers/ListTest.cpp deleted file mode 100644 index f24e650b43b681dc82b9571800e0c3201b202504..0000000000000000000000000000000000000000 --- a/src/UnitTests/Containers/ListTest.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/*************************************************************************** - ListTest.cpp - description - ------------------- - begin : Feb 15, 2014 - copyright : (C) 2014 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifdef HAVE_GTEST -#include <gtest/gtest.h> - -#include <TNL/Containers/List.h> - -using namespace TNL; -using namespace TNL::Containers; - -// minimal custom data structure usable as ValueType in List -struct MyData -{ - double data; - - __cuda_callable__ - MyData() : data(0) {} - - template< typename T > - __cuda_callable__ - MyData( T v ) : data(v) {} - - __cuda_callable__ - bool operator==( const MyData& v ) const { return data == v.data; } - - __cuda_callable__ - bool operator!=( const MyData& v ) const { return data != v.data; } - - static String getType() - { - return String( "MyData" ); - } -}; - -std::ostream& operator<<( std::ostream& str, const MyData& v ) -{ - return str << v.data; -} - - -// test fixture for typed tests -template< typename List > -class ListTest : public ::testing::Test -{ -protected: - using ListType = List; -}; - -// types for which ListTest is instantiated -using ListTypes = ::testing::Types< - List< short >, - List< int >, - List< long >, - List< float >, - List< double >, - List< MyData > ->; - -TYPED_TEST_SUITE( ListTest, ListTypes ); - - -TYPED_TEST( ListTest, constructor ) -{ - using ListType = typename TestFixture::ListType; - using ValueType = typename ListType::ValueType; - - ListType list; - EXPECT_TRUE( list.isEmpty() ); - EXPECT_EQ( list.getSize(), 0 ); - - list.Append( ( ValueType ) 0 ); - EXPECT_EQ( list.getSize(), 1 ); - - ListType copy( list ); - list.Append( ( ValueType ) 0 ); - EXPECT_EQ( list.getSize(), 2 ); - EXPECT_EQ( copy.getSize(), 1 ); - EXPECT_EQ( copy[ 0 ], list[ 0 ] ); -} - -TYPED_TEST( ListTest, operations ) -{ - using ListType = typename TestFixture::ListType; - using ValueType = typename ListType::ValueType; - - ListType a, b; - - a.Append( (ValueType) 0 ); - a.Append( (ValueType) 1 ); - a.Prepend( (ValueType) 2 ); - a.Insert( (ValueType) 3, 1 ); - EXPECT_EQ( a.getSize(), 4 ); - EXPECT_EQ( a[ 0 ], (ValueType) 2 ); - EXPECT_EQ( a[ 1 ], (ValueType) 3 ); - EXPECT_EQ( a[ 2 ], (ValueType) 0 ); - EXPECT_EQ( a[ 3 ], (ValueType) 1 ); - - b = a; - EXPECT_EQ( b.getSize(), 4 ); - EXPECT_EQ( a, b ); - - b.Insert( ( ValueType ) 4, 4 ); - EXPECT_NE( a, b ); - EXPECT_EQ( b[ 4 ], (ValueType) 4 ); - - a.AppendList( b ); - EXPECT_EQ( a.getSize(), 9 ); - EXPECT_EQ( a[ 0 ], (ValueType) 2 ); - EXPECT_EQ( a[ 1 ], (ValueType) 3 ); - EXPECT_EQ( a[ 2 ], (ValueType) 0 ); - EXPECT_EQ( a[ 3 ], (ValueType) 1 ); - EXPECT_EQ( a[ 4 ], (ValueType) 2 ); - EXPECT_EQ( a[ 5 ], (ValueType) 3 ); - EXPECT_EQ( a[ 6 ], (ValueType) 0 ); - EXPECT_EQ( a[ 7 ], (ValueType) 1 ); - EXPECT_EQ( a[ 8 ], (ValueType) 4 ); - - a.PrependList( b ); - EXPECT_EQ( a.getSize(), 14 ); - EXPECT_EQ( a[ 0 ], (ValueType) 2 ); - EXPECT_EQ( a[ 1 ], (ValueType) 3 ); - EXPECT_EQ( a[ 2 ], (ValueType) 0 ); - EXPECT_EQ( a[ 3 ], (ValueType) 1 ); - EXPECT_EQ( a[ 4 ], (ValueType) 4 ); - EXPECT_EQ( a[ 5 ], (ValueType) 2 ); - EXPECT_EQ( a[ 6 ], (ValueType) 3 ); - EXPECT_EQ( a[ 7 ], (ValueType) 0 ); - EXPECT_EQ( a[ 8 ], (ValueType) 1 ); - EXPECT_EQ( a[ 9 ], (ValueType) 2 ); - EXPECT_EQ( a[ 10 ], (ValueType) 3 ); - EXPECT_EQ( a[ 11 ], (ValueType) 0 ); - EXPECT_EQ( a[ 12 ], (ValueType) 1 ); - EXPECT_EQ( a[ 13 ], (ValueType) 4 ); -} -#endif - - -#include "../main.h" diff --git a/src/UnitTests/Containers/VectorBinaryOperationsTest.h b/src/UnitTests/Containers/VectorBinaryOperationsTest.h index 93283483c8a06e89c6e9dfceee81f82e391c5955..bae5ce5f389e703711df651084994c8f39775116 100644 --- a/src/UnitTests/Containers/VectorBinaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorBinaryOperationsTest.h @@ -595,8 +595,11 @@ TYPED_TEST( VectorBinaryOperationsTest, comparisonOnDifferentDevices ) { SETUP_BINARY_TEST_ALIASES; - typename TestFixture::RightVector::HostType _R1_h; _R1_h = this->_R1; - typename TestFixture::Right::HostType R1_h( _R1_h ); + using RightHostVector = typename TestFixture::RightVector::Self< typename TestFixture::RightVector::RealType, Devices::Sequential >; + using RightHost = typename TestFixture::Right::Self< typename TestFixture::Right::RealType, Devices::Sequential >; + + RightHostVector _R1_h; _R1_h = this->_R1; + RightHost R1_h( _R1_h ); // L1 and L2 are device vectors EXPECT_EQ( L1, R1_h ); diff --git a/src/UnitTests/Containers/VectorHelperFunctions.h b/src/UnitTests/Containers/VectorHelperFunctions.h index b1a596c6a4502effce2f9e0a593edf3a25e4ea23..4e8c64faee6b906076bfcb55aabab9c46960eaf1 100644 --- a/src/UnitTests/Containers/VectorHelperFunctions.h +++ b/src/UnitTests/Containers/VectorHelperFunctions.h @@ -9,7 +9,8 @@ void setLinearSequence( Vector& deviceVector ) #ifdef STATIC_VECTOR Vector a; #else - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >; + HostVector a; a.setLike( deviceVector ); #endif #ifdef DISTRIBUTED_VECTOR @@ -34,7 +35,8 @@ void setConstantSequence( Vector& deviceVector, template< typename Vector > void setOscilatingLinearSequence( Vector& deviceVector ) { - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >; + HostVector a; a.setLike( deviceVector ); for( int i = 0; i < a.getSize(); i++ ) a[ i ] = i % 30 - 15; @@ -45,7 +47,8 @@ template< typename Vector > void setOscilatingConstantSequence( Vector& deviceVector, typename Vector::RealType v ) { - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >; + HostVector a; a.setLike( deviceVector ); for( int i = 0; i < a.getSize(); i++ ) a[ i ] = TNL::sign( i % 30 - 15 ); @@ -55,7 +58,8 @@ void setOscilatingConstantSequence( Vector& deviceVector, template< typename Vector > void setNegativeLinearSequence( Vector& deviceVector ) { - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >; + HostVector a; a.setLike( deviceVector ); #ifdef DISTRIBUTED_VECTOR for( int i = 0; i < a.getLocalView().getSize(); i++ ) { @@ -76,7 +80,8 @@ void setOscilatingSequence( Vector& deviceVector, #ifdef STATIC_VECTOR Vector a; #else - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Host >; + HostVector a; a.setLike( deviceVector ); #endif #ifdef DISTRIBUTED_VECTOR diff --git a/src/UnitTests/Containers/VectorPrefixSumTest.h b/src/UnitTests/Containers/VectorPrefixSumTest.h index be295001ade53ab0be2b1afd9ce8950b333cbf4c..7f2151c5ef15429d549020d076e4fd99681c3b8f 100644 --- a/src/UnitTests/Containers/VectorPrefixSumTest.h +++ b/src/UnitTests/Containers/VectorPrefixSumTest.h @@ -17,13 +17,14 @@ // and large enough to require multiple CUDA blocks for reduction constexpr int VECTOR_TEST_SIZE = 10000; -TYPED_TEST( VectorTest, prefixSum ) +TYPED_TEST( VectorTest, scan ) { using VectorType = typename TestFixture::VectorType; using ViewType = typename TestFixture::ViewType; using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; + using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >; const int size = VECTOR_TEST_SIZE; // FIXME: tests should work in all cases @@ -32,25 +33,25 @@ TYPED_TEST( VectorTest, prefixSum ) VectorType v( size ); ViewType v_view( v ); - typename VectorType::HostType v_host( size ); + HostVectorType v_host( size ); setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -58,21 +59,21 @@ TYPED_TEST( VectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v_view; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i + 1 ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i + 1)) / 2 ) << "i = " << i; @@ -86,7 +87,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 0 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -94,7 +95,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) @@ -102,7 +103,7 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v.prefixSum(); + v.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -111,7 +112,7 @@ TYPED_TEST( VectorTest, prefixSum ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -119,7 +120,7 @@ TYPED_TEST( VectorTest, prefixSum ) setConstantSequence( v, 1 ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v_view; for( int i = 0; i < size; i++ ) @@ -127,7 +128,7 @@ TYPED_TEST( VectorTest, prefixSum ) setLinearSequence( v ); v_host = -1; - v_view.prefixSum(); + v_view.scan(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Inclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -145,6 +146,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) using RealType = typename VectorType::RealType; using DeviceType = typename VectorType::DeviceType; using IndexType = typename VectorType::IndexType; + using HostVectorType = typename VectorType::template Self< RealType, Devices::Sequential >; const int size = VECTOR_TEST_SIZE; // FIXME: tests should work in all cases @@ -154,25 +156,25 @@ TYPED_TEST( VectorTest, exclusiveScan ) VectorType v; v.setSize( size ); ViewType v_view( v ); - typename VectorType::HostType v_host( size ); + HostVectorType v_host( size ); setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -180,21 +182,21 @@ TYPED_TEST( VectorTest, exclusiveScan ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], 0 ) << "i = " << i; setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], i ) << "i = " << i; setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); v_host = v; for( int i = 0; i < size; i++ ) EXPECT_EQ( v_host[ i ], (i * (i - 1)) / 2 ) << "i = " << i; @@ -208,7 +210,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 0 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -216,7 +218,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 1 ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -224,7 +226,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setLinearSequence( v ); v_host = -1; - v.template prefixSum< Algorithms::ScanType::Exclusive >(); + v.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -233,7 +235,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) // test views setConstantSequence( v, 0 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -241,7 +243,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setConstantSequence( v, 1 ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -249,7 +251,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) setLinearSequence( v ); v_host = -1; - v_view.template prefixSum< Algorithms::ScanType::Exclusive >(); + v_view.template scan< Algorithms::ScanType::Exclusive >(); EXPECT_GT( ( Algorithms::CudaScanKernelLauncher< Algorithms::ScanType::Exclusive, RealType, IndexType >::gridsCount() ), 1 ); v_host = v; for( int i = 0; i < size; i++ ) @@ -260,7 +262,7 @@ TYPED_TEST( VectorTest, exclusiveScan ) } } -// TODO: test prefix sum with custom begin and end parameters +// TODO: test scan with custom begin and end parameters template< typename FlagsView > diff --git a/src/UnitTests/Containers/VectorTestSetup.h b/src/UnitTests/Containers/VectorTestSetup.h index 5c342dced87f713824344cb43ee1c9922dbf0ef6..c8ec42bea482a1691fca97efecb8342985f8207d 100644 --- a/src/UnitTests/Containers/VectorTestSetup.h +++ b/src/UnitTests/Containers/VectorTestSetup.h @@ -76,21 +76,6 @@ using VectorTypes = ::testing::Types< //Vector< Quad< float >, Devices::Cuda, long >, //Vector< Quad< double >, Devices::Cuda, long > #endif -#ifdef HAVE_MIC - , - Vector< int, Devices::MIC, short >, - Vector< long, Devices::MIC, short >, - Vector< float, Devices::MIC, short >, - Vector< double, Devices::MIC, short >, - Vector< int, Devices::MIC, int >, - Vector< long, Devices::MIC, int >, - Vector< float, Devices::MIC, int >, - Vector< double, Devices::MIC, int >, - Vector< int, Devices::MIC, long >, - Vector< long, Devices::MIC, long >, - Vector< float, Devices::MIC, long >, - Vector< double, Devices::MIC, long > -#endif >; TYPED_TEST_SUITE( VectorTest, VectorTypes ); diff --git a/src/UnitTests/Containers/VectorUnaryOperationsTest.h b/src/UnitTests/Containers/VectorUnaryOperationsTest.h index 1224042532b2c55eb33757e61fe603219840aa61..827147cd53552209f9875370b93ca75d7514728a 100644 --- a/src/UnitTests/Containers/VectorUnaryOperationsTest.h +++ b/src/UnitTests/Containers/VectorUnaryOperationsTest.h @@ -168,14 +168,16 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); using VectorOrView = typename TestFixture::VectorOrView; \ using RealType = typename VectorType::RealType; \ using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \ + using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \ + using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \ constexpr int size = _size; \ using CommunicatorType = typename VectorOrView::CommunicatorType; \ const auto group = CommunicatorType::AllGroup; \ using LocalRangeType = typename VectorOrView::LocalRangeType; \ const LocalRangeType localRange = Partitioner< typename VectorOrView::IndexType, CommunicatorType >::splitRange( size, group ); \ \ - typename VectorType::HostType _V1h; \ - typename ExpectedVector::HostType expected_h; \ + HostVector _V1h; \ + HostExpectedVector expected_h; \ _V1h.setDistribution( localRange, size, group ); \ expected_h.setDistribution( localRange, size, group ); \ \ @@ -209,10 +211,12 @@ TYPED_TEST_SUITE( VectorUnaryOperationsTest, VectorTypes ); using VectorOrView = typename TestFixture::VectorOrView; \ using RealType = typename VectorType::RealType; \ using ExpectedVector = typename TestFixture::template Vector< decltype(function(RealType{})) >; \ + using HostVector = typename VectorType::template Self< RealType, Devices::Host >; \ + using HostExpectedVector = typename ExpectedVector::template Self< decltype(function(RealType{})), Devices::Host >; \ constexpr int size = _size; \ \ - typename VectorType::HostType _V1h( size ); \ - typename ExpectedVector::HostType expected_h( size ); \ + HostVector _V1h( size ); \ + HostExpectedVector expected_h( size ); \ \ const double h = (double) (end - begin) / size; \ for( int i = 0; i < size; i++ ) \ @@ -254,8 +258,8 @@ void expect_vectors_near( const Left& _v1, const Right& _v2 ) using LeftVector = Vector< LeftNonConstReal, typename Left::DeviceType, typename Left::IndexType >; using RightVector = Vector< RightNonConstReal, typename Right::DeviceType, typename Right::IndexType >; #endif - using LeftHostVector = typename LeftVector::HostType; - using RightHostVector = typename RightVector::HostType; + using LeftHostVector = typename LeftVector::template Self< LeftNonConstReal, Devices::Sequential >; + using RightHostVector = typename RightVector::template Self< RightNonConstReal, Devices::Sequential >; // first evaluate expressions LeftVector v1; v1 = _v1; diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h index 2faf5ba040b71d8003ceb7629a1b9fa7a80870ef..04afb91a4073750ba5583465b85eeda2239f0628 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_1D_test.h @@ -139,7 +139,7 @@ void test_helper_setValue( DistributedArray& array, BufferView& buffer_view ) { buffer_view[ i - localRange.getBegin() ] = array_view( i ); }; - ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel ); } TYPED_TEST( DistributedNDArray_1D_test, setValue ) @@ -224,7 +224,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v, v_view( gi ) = gi; w_view( gi ) = 2 * gi; }; - ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel ); + Algorithms::ParallelFor< DeviceType >::exec( localRange.getBegin(), localRange.getEnd(), kernel ); } TYPED_TEST( DistributedNDArray_1D_test, comparisonOperators ) diff --git a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h index 33390a33c8a230d2946f54569e211a4a711713d0..17108509d037be69b78ce7a13bc54edbf37731b3 100644 --- a/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h +++ b/src/UnitTests/Containers/ndarray/DistributedNDArray_semi1D_test.h @@ -181,7 +181,7 @@ void test_helper_comparisonOperators( DistributedArray& u, DistributedArray& v, v_view( q, gi, j ) = gi; w_view( q, gi, j ) = 2 * gi; }; - ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0, + Algorithms::ParallelFor3D< DeviceType >::exec( (IndexType) 0, localRange.getBegin(), (IndexType) 0, 9, localRange.getEnd(), u.template getSize< 2 >(), kernel ); } diff --git a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu index 0a0a83dd83fae72ff0f1b5c349d81ba05ed0da65..5a0561955f85cce3fac6798cd480466e495cc181 100644 --- a/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu +++ b/src/UnitTests/Containers/ndarray/StaticNDArrayCudaTest.cu @@ -2,8 +2,9 @@ #include <TNL/Containers/NDArray.h> -#include <TNL/ParallelFor.h> +#include <TNL/Algorithms/ParallelFor.h> +using namespace TNL; using namespace TNL::Containers; using std::index_sequence; @@ -37,7 +38,7 @@ void __test_SetThroughView() }; a.setValue(0); - TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view ); + Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view ); expect_identity( a.getStorageArray() ); } TEST( StaticNDArrayCudaTest, SetThroughView ) @@ -68,7 +69,7 @@ void __test_CopyFromArray() }; a.setValue(0); - TNL::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view ); + Algorithms::ParallelFor< TNL::Devices::Cuda >::exec( 0, 1, kernel, a_view ); expect_identity( a.getStorageArray() ); } TEST( StaticNDArrayCudaTest, CopyFromArray ) diff --git a/src/UnitTests/FileTest.h b/src/UnitTests/FileTest.h index f376c60f4242922a407ede2063b420b7eb9f4b32..b9f2ee7ef1f73f65ab231583f155f138d8d10626 100644 --- a/src/UnitTests/FileTest.h +++ b/src/UnitTests/FileTest.h @@ -61,15 +61,15 @@ TEST( FileTest, WriteAndReadWithConversion ) int intData[ 3 ]; File file; ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::out | std::ios_base::trunc ) ); - file.save< double, float, Devices::Host >( doubleData, 3 ); + file.save< double, float >( doubleData, 3 ); ASSERT_NO_THROW( file.close() ); ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) ); - file.load< float, float, Devices::Host >( floatData, 3 ); + file.load< float, float >( floatData, 3 ); ASSERT_NO_THROW( file.close() ); ASSERT_NO_THROW( file.open( TEST_FILE_NAME, std::ios_base::in ) ); - file.load< int, float, Devices::Host >( intData, 3 ); + file.load< int, float >( intData, 3 ); ASSERT_NO_THROW( file.close() ); EXPECT_NEAR( floatData[ 0 ], 3.14159, 0.0001 ); @@ -112,9 +112,9 @@ TEST( FileTest, WriteAndReadCUDA ) File file; ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out ) ); - file.save< int, int, Devices::Cuda >( cudaIntData ); - file.save< float, float, Devices::Cuda >( cudaFloatData, 3 ); - file.save< const double, double, Devices::Cuda >( cudaConstDoubleData ); + file.save< int, int, Allocators::Cuda<int> >( cudaIntData ); + file.save< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 ); + file.save< const double, double, Allocators::Cuda<const double> >( cudaConstDoubleData ); ASSERT_NO_THROW( file.close() ); ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) ); @@ -127,9 +127,9 @@ TEST( FileTest, WriteAndReadCUDA ) cudaMalloc( ( void** ) &newCudaIntData, sizeof( int ) ); cudaMalloc( ( void** ) &newCudaFloatData, 3 * sizeof( float ) ); cudaMalloc( ( void** ) &newCudaDoubleData, sizeof( double ) ); - file.load< int, int, Devices::Cuda >( newCudaIntData, 1 ); - file.load< float, float, Devices::Cuda >( newCudaFloatData, 3 ); - file.load< double, double, Devices::Cuda >( newCudaDoubleData, 1 ); + file.load< int, int, Allocators::Cuda<int> >( newCudaIntData, 1 ); + file.load< float, float, Allocators::Cuda<float> >( newCudaFloatData, 3 ); + file.load< double, double, Allocators::Cuda<double> >( newCudaDoubleData, 1 ); cudaMemcpy( &newIntData, newCudaIntData, sizeof( int ), @@ -172,15 +172,15 @@ TEST( FileTest, WriteAndReadCUDAWithConversion ) File file; ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::out | std::ios_base::trunc ) ); - file.save< double, float, Devices::Cuda >( cudaConstDoubleData, 3 ); + file.save< double, float, Allocators::Cuda<double> >( cudaConstDoubleData, 3 ); ASSERT_NO_THROW( file.close() ); ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) ); - file.load< float, float, Devices::Cuda >( cudaFloatData, 3 ); + file.load< float, float, Allocators::Cuda<float> >( cudaFloatData, 3 ); ASSERT_NO_THROW( file.close() ); ASSERT_NO_THROW( file.open( String( TEST_FILE_NAME ), std::ios_base::in ) ); - file.load< int, float, Devices::Cuda >( cudaIntData, 3 ); + file.load< int, float, Allocators::Cuda<int> >( cudaIntData, 3 ); ASSERT_NO_THROW( file.close() ); cudaMemcpy( floatData, diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h index 7c74e77040e8d554b9ca641d0b9fb9f1e18dc741..93673a29063db52afcabf59165ea5949471cf1bc 100644 --- a/src/UnitTests/Matrices/DistributedMatrixTest.h +++ b/src/UnitTests/Matrices/DistributedMatrixTest.h @@ -6,10 +6,22 @@ email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +#include <TNL/Communicators/MpiCommunicator.h> +#include <TNL/Communicators/NoDistrCommunicator.h> +#include <TNL/Matrices/DistributedMatrix.h> +#include <TNL/Containers/Partitioner.h> +#include <TNL/Matrices/CSR.h> + +using namespace TNL; + template< typename Vector > void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset = 0 ) { - typename Vector::HostType a; + using HostVector = typename Vector::template Self< typename Vector::RealType, TNL::Devices::Sequential >; + HostVector a; a.setLike( deviceVector ); for( int i = 0; i < a.getLocalView().getSize(); i++ ) { const auto gi = a.getLocalRange().getGlobalIndex( i ); @@ -21,8 +33,11 @@ void setLinearSequence( Vector& deviceVector, typename Vector::RealType offset = template< typename Matrix, typename RowLengths > void setMatrix( Matrix& matrix, const RowLengths& rowLengths ) { - typename Matrix::HostType hostMatrix; - typename RowLengths::HostType hostRowLengths; + using HostMatrix = Matrices::DistributedMatrix< typename Matrix::MatrixType::template Self< typename Matrix::RealType, TNL::Devices::Sequential >, typename Matrix::CommunicatorType >; + using HostRowLengths = typename RowLengths::template Self< typename RowLengths::RealType, TNL::Devices::Sequential >; + + HostMatrix hostMatrix; + HostRowLengths hostRowLengths; hostMatrix.setLike( matrix ); hostRowLengths = rowLengths; hostMatrix.setCompressedRowLengths( hostRowLengths ); @@ -36,17 +51,6 @@ void setMatrix( Matrix& matrix, const RowLengths& rowLengths ) matrix = hostMatrix; } -#ifdef HAVE_GTEST -#include <gtest/gtest.h> - -#include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Communicators/NoDistrCommunicator.h> -#include <TNL/Matrices/DistributedMatrix.h> -#include <TNL/Containers/Partitioner.h> -#include <TNL/Matrices/CSR.h> - -using namespace TNL; - /* * Light check of DistributedMatrix. * diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 9de7b70a281d346d80037761ae0633c09450e227..03b80259d502cf43eb05f7c6b14053aa2e4ed7d7 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -10,12 +10,6 @@ // TODO /* - * getType() ::HOW? How to test this for each format? edit string how? - * Found the mistake for Cuda instead of Devices::Cuda. Incorrect String in src/TNL/Devices/Cuda.cpp - * MISSING: indexType is missing in CSR_impl.h - * getTypeVirtual() ::TEST? This just calls getType(). - * getSerializationType() ::TEST? This just calls HostType::getType(). - * getSerializationTypeVirtual() ::TEST? This just calls getSerializationType(). * setDimensions() ::DONE * setCompressedRowLengths() ::DONE * getRowLength() ::USED! In test_SetCompressedRowLengths() to verify the test itself. diff --git a/src/UnitTests/Meshes/BoundaryTagsTest.h b/src/UnitTests/Meshes/BoundaryTagsTest.h index b7eccf5f24b16927a4cd211b876f24bd5d9512ed..6bc07adabac4a0e1470c13238acf947f1b97f8fb 100644 --- a/src/UnitTests/Meshes/BoundaryTagsTest.h +++ b/src/UnitTests/Meshes/BoundaryTagsTest.h @@ -37,7 +37,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest ) using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType; using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); const IndexType xSize( 3 ), ySize( 4 ); const RealType width( 1.0 ), height( 1.0 ); diff --git a/src/UnitTests/Meshes/CMakeLists.txt b/src/UnitTests/Meshes/CMakeLists.txt index c71bde352dc80832dc9dce44a896905372635579..91bf37215b772df499a4ecc7d32cd4fdfe335f05 100644 --- a/src/UnitTests/Meshes/CMakeLists.txt +++ b/src/UnitTests/Meshes/CMakeLists.txt @@ -10,6 +10,10 @@ if( ${BUILD_CUDA} AND ${CUDA_VERSION_MAJOR} GREATER_EQUAL 9 ) OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cu + OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} ) @@ -18,6 +22,10 @@ else() TARGET_COMPILE_OPTIONS( MeshTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MeshTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( MeshTraverserTest MeshTraverserTest.cpp ) + TARGET_COMPILE_OPTIONS( MeshTraverserTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MeshTraverserTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( MeshOrderingTest MeshOrderingTest.cpp ) TARGET_COMPILE_OPTIONS( MeshOrderingTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( MeshOrderingTest ${GTEST_BOTH_LIBRARIES} ) @@ -30,6 +38,7 @@ TARGET_LINK_LIBRARIES( MeshEntityTest ${GTEST_BOTH_LIBRARIES} ) ADD_TEST( BoundaryTagsTest ${EXECUTABLE_OUTPUT_PATH}/BoundaryTagsTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MeshTest ${EXECUTABLE_OUTPUT_PATH}/MeshTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( MeshTraverserTest ${EXECUTABLE_OUTPUT_PATH}/MeshTraverserTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MeshOrderingTest ${EXECUTABLE_OUTPUT_PATH}/MeshOrderingTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( MeshEntityTest ${EXECUTABLE_OUTPUT_PATH}/MeshEntityTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Meshes/MeshEntityTest.h b/src/UnitTests/Meshes/MeshEntityTest.h index 5360fd6a8b9151972c798f8f2abb97bf6e31d6eb..235150d9d26748a7b95af60fb79471ee8e3986da 100644 --- a/src/UnitTests/Meshes/MeshEntityTest.h +++ b/src/UnitTests/Meshes/MeshEntityTest.h @@ -114,7 +114,8 @@ TEST( MeshEntityTest, VertexMeshEntityTest ) using VertexMeshEntityType = TestMeshEntity< TestEdgeMeshConfig, typename EdgeMeshEntityType::SubentityTraits< 0 >::SubentityTopology >; using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); VertexMeshEntityType vertexEntity; PointType point; @@ -131,7 +132,8 @@ TEST( MeshEntityTest, EdgeMeshEntityTest ) static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." ); using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); /**** * @@ -211,7 +213,8 @@ TEST( MeshEntityTest, TriangleMeshEntityTest ) static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." ); using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); /**** * We set-up the same situation as in the test above @@ -293,7 +296,8 @@ TEST( MeshEntityTest, TetrahedronMeshEntityTest ) static_assert( EdgeMeshEntityType::SubentityTraits< 0 >::storageEnabled, "Testing edge entity does not store vertices as required." ); using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 3, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value, + "unexpected PointType" ); /**** * We set-up similar situation as above but with @@ -457,7 +461,8 @@ TEST( MeshEntityTest, TwoTrianglesMeshEntityTest ) static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." ); using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); /**** * We set-up the following situation @@ -647,7 +652,8 @@ TEST( MeshEntityTest, OneTriangleComparisonTest ) static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." ); using PointType = typename VertexMeshEntityType::PointType; - EXPECT_EQ( PointType::getType(), ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); PointType point0( 0.0, 0.0 ), point1( 1.0, 0.0 ), diff --git a/src/UnitTests/Meshes/MeshTest.h b/src/UnitTests/Meshes/MeshTest.h index 352a2d791395cd16ce8b2afeff628461abe0b27c..5c95221ed8ad611f61265209fcc5d25b7cd5bb59 100644 --- a/src/UnitTests/Meshes/MeshTest.h +++ b/src/UnitTests/Meshes/MeshTest.h @@ -180,7 +180,8 @@ TEST( MeshTest, TwoTrianglesTest ) static_assert( VertexMeshEntityType::SuperentityTraits< 1 >::storageEnabled, "Testing vertex entity does not store edges as required." ); using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); /**** * We set-up the following situation @@ -289,7 +290,8 @@ TEST( MeshTest, TetrahedronsTest ) using VertexMeshEntityType = typename TetrahedronMeshEntityType::SubentityTraits< 0 >::SubentityType; using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value, + "unexpected PointType" ); typedef Mesh< TestTetrahedronMeshConfig > TestTetrahedronMesh; TestTetrahedronMesh mesh; @@ -454,7 +456,8 @@ TEST( MeshTest, RegularMeshOfTrianglesTest ) using VertexMeshEntityType = typename TriangleMeshEntityType::SubentityTraits< 0 >::SubentityType; using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); const IndexType xSize( 5 ), ySize( 5 ); const RealType width( 1.0 ), height( 1.0 ); @@ -554,7 +557,8 @@ TEST( MeshTest, RegularMeshOfQuadrilateralsTest ) using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType; using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 2, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); const IndexType xSize( 3 ), ySize( 4 ); const RealType width( 1.0 ), height( 1.0 ); @@ -652,7 +656,8 @@ TEST( MeshTest, RegularMeshOfHexahedronsTest ) using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType; using PointType = typename VertexMeshEntityType::PointType; - ASSERT_TRUE( PointType::getType() == ( Containers::StaticVector< 3, RealType >::getType() ) ); + static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value, + "unexpected PointType" ); const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 ); const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 ); diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cpp b/src/UnitTests/Meshes/MeshTraverserTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..426d5fef5de10ddc6dfdc795435c57b1c7be5b5c --- /dev/null +++ b/src/UnitTests/Meshes/MeshTraverserTest.cpp @@ -0,0 +1 @@ +#include "MeshTraverserTest.h" diff --git a/src/UnitTests/Meshes/MeshTraverserTest.cu b/src/UnitTests/Meshes/MeshTraverserTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..426d5fef5de10ddc6dfdc795435c57b1c7be5b5c --- /dev/null +++ b/src/UnitTests/Meshes/MeshTraverserTest.cu @@ -0,0 +1 @@ +#include "MeshTraverserTest.h" diff --git a/src/UnitTests/Meshes/MeshTraverserTest.h b/src/UnitTests/Meshes/MeshTraverserTest.h new file mode 100644 index 0000000000000000000000000000000000000000..b6c8208ad9e48731c45b2a6875fe722594a6ffe7 --- /dev/null +++ b/src/UnitTests/Meshes/MeshTraverserTest.h @@ -0,0 +1,434 @@ +#pragma once + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +#include <TNL/Meshes/Mesh.h> +#include <TNL/Meshes/MeshEntity.h> +#include <TNL/Meshes/DefaultConfig.h> +#include <TNL/Meshes/Topologies/Quadrilateral.h> +#include <TNL/Meshes/Topologies/Hexahedron.h> +#include <TNL/Meshes/MeshBuilder.h> +#include <TNL/Meshes/Traverser.h> + +namespace MeshTest { + +using namespace TNL; +using namespace TNL::Meshes; + +using RealType = double; +using Device = Devices::Host; +using IndexType = int; + +static const char* TEST_FILE_NAME = "test_MeshTest.tnl"; + +// FIXME: Traverser does not work with Id = void +//class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral > +class TestQuadrilateralMeshConfig : public DefaultConfig< Topologies::Quadrilateral, 2, double, int, int, int > +{ +public: + static constexpr bool entityStorage( int dimensions ) { return true; } + template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; } + template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) { return ( SubentityDimensions % 2 != 0 ); } + template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; } +}; + +// FIXME: Traverser does not work with Id = void +//class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron > +class TestHexahedronMeshConfig : public DefaultConfig< Topologies::Hexahedron, 3, double, int, int, int > +{ +public: + static constexpr bool entityStorage( int dimensions ) { return true; } + template< typename EntityTopology > static constexpr bool subentityStorage( EntityTopology, int SubentityDimensions ) { return true; } + template< typename EntityTopology > static constexpr bool subentityOrientationStorage( EntityTopology, int SubentityDimensions ) { return ( SubentityDimensions % 2 != 0 ); } + template< typename EntityTopology > static constexpr bool superentityStorage( EntityTopology, int SuperentityDimensions ) { return true; } +}; + +struct TestEntitiesProcessor +{ + template< typename Mesh, typename UserData, typename Entity > + __cuda_callable__ + static void processEntity( const Mesh& mesh, UserData& userData, const Entity& entity ) + { + userData[ entity.getIndex() ] += 1; + } +}; + +template< typename EntityType, typename DeviceMeshPointer, typename HostArray > +void testCudaTraverser( const DeviceMeshPointer& deviceMeshPointer, + const HostArray& host_array_boundary, + const HostArray& host_array_interior, + const HostArray& host_array_all ) +{ + using MeshType = typename DeviceMeshPointer::ObjectType; + Traverser< MeshType, EntityType > traverser; + + Containers::Array< int, Devices::Cuda > array_boundary( deviceMeshPointer->template getEntitiesCount< EntityType >() ); + Containers::Array< int, Devices::Cuda > array_interior( deviceMeshPointer->template getEntitiesCount< EntityType >() ); + Containers::Array< int, Devices::Cuda > array_all ( deviceMeshPointer->template getEntitiesCount< EntityType >() ); + + array_boundary.setValue( 0 ); + array_interior.setValue( 0 ); + array_all .setValue( 0 ); + + traverser.template processBoundaryEntities< TestEntitiesProcessor >( deviceMeshPointer, array_boundary.getView() ); + traverser.template processInteriorEntities< TestEntitiesProcessor >( deviceMeshPointer, array_interior.getView() ); + traverser.template processAllEntities < TestEntitiesProcessor >( deviceMeshPointer, array_all.getView() ); + + EXPECT_EQ( array_boundary, host_array_boundary ); + EXPECT_EQ( array_interior, host_array_interior ); + EXPECT_EQ( array_all, host_array_all ); +} + +TEST( MeshTest, RegularMeshOfQuadrilateralsTest ) +{ + using QuadrilateralMeshEntityType = MeshEntity< TestQuadrilateralMeshConfig, Devices::Host, Topologies::Quadrilateral >; + using EdgeMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 1 >::SubentityType; + using VertexMeshEntityType = typename QuadrilateralMeshEntityType::SubentityTraits< 0 >::SubentityType; + + using PointType = typename VertexMeshEntityType::PointType; + static_assert( std::is_same< PointType, Containers::StaticVector< 2, RealType > >::value, + "unexpected PointType" ); + + const IndexType xSize( 3 ), ySize( 4 ); + const RealType width( 1.0 ), height( 1.0 ); + const RealType hx( width / ( RealType ) xSize ), + hy( height / ( RealType ) ySize ); + const IndexType numberOfCells = xSize * ySize; + const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 ); + + using TestQuadrilateralMesh = Mesh< TestQuadrilateralMeshConfig >; + Pointers::SharedPointer< TestQuadrilateralMesh > meshPointer; + MeshBuilder< TestQuadrilateralMesh > meshBuilder; + meshBuilder.setPointsCount( numberOfVertices ); + meshBuilder.setCellsCount( numberOfCells ); + + /**** + * Setup vertices + */ + for( IndexType j = 0; j <= ySize; j++ ) + for( IndexType i = 0; i <= xSize; i++ ) + meshBuilder.setPoint( j * ( xSize + 1 ) + i, PointType( i * hx, j * hy ) ); + + /**** + * Setup cells + */ + IndexType cellIdx( 0 ); + for( IndexType j = 0; j < ySize; j++ ) + for( IndexType i = 0; i < xSize; i++ ) + { + const IndexType vertex0 = j * ( xSize + 1 ) + i; + const IndexType vertex1 = j * ( xSize + 1 ) + i + 1; + const IndexType vertex2 = ( j + 1 ) * ( xSize + 1 ) + i + 1; + const IndexType vertex3 = ( j + 1 ) * ( xSize + 1 ) + i; + + meshBuilder.getCellSeed( cellIdx ).setCornerId( 0, vertex0 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 1, vertex1 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 2, vertex2 ); + meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 3, vertex3 ); + } + + ASSERT_TRUE( meshBuilder.build( *meshPointer ) ); + + // traversers for all test cases + Traverser< TestQuadrilateralMesh, QuadrilateralMeshEntityType > traverser_cells; + Traverser< TestQuadrilateralMesh, EdgeMeshEntityType > traverser_edges; + Traverser< TestQuadrilateralMesh, VertexMeshEntityType > traverser_vertices; + + // arrays for all test cases + Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 2 >() ); + Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 2 >() ); + Containers::Array< int > array_cells_all ( meshPointer->template getEntitiesCount< 2 >() ); + + Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() ); + Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() ); + Containers::Array< int > array_edges_all ( meshPointer->template getEntitiesCount< 1 >() ); + + Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() ); + Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() ); + Containers::Array< int > array_vertices_all ( meshPointer->template getEntitiesCount< 0 >() ); + + // reset all arrays + array_cells_boundary.setValue( 0 ); + array_cells_interior.setValue( 0 ); + array_cells_all .setValue( 0 ); + + array_edges_boundary.setValue( 0 ); + array_edges_interior.setValue( 0 ); + array_edges_all .setValue( 0 ); + + array_vertices_boundary.setValue( 0 ); + array_vertices_interior.setValue( 0 ); + array_vertices_all .setValue( 0 ); + + // traverse for all test cases + traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() ); + traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() ); + traverser_cells.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() ); + + traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() ); + traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() ); + traverser_edges.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() ); + + traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() ); + traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() ); + traverser_vertices.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() ); + + // test traversing cells + for( IndexType j = 0; j < ySize; j++ ) + for( IndexType i = 0; i < xSize; i++ ) + { + const IndexType idx = j * xSize + i; + if( j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) { + EXPECT_EQ( array_cells_boundary[ idx ], 1 ); + EXPECT_EQ( array_cells_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_cells_boundary[ idx ], 0 ); + EXPECT_EQ( array_cells_interior[ idx ], 1 ); + } + EXPECT_EQ( array_cells_all[ idx ], 1 ); + } + + // test traversing edges + // (edges are not numbered systematically, so we just compare with isBoundaryEntity) + for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ ) + { + if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) { + EXPECT_EQ( array_edges_boundary[ idx ], 1 ); + EXPECT_EQ( array_edges_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_edges_boundary[ idx ], 0 ); + EXPECT_EQ( array_edges_interior[ idx ], 1 ); + } + EXPECT_EQ( array_edges_all[ idx ], 1 ); + } + + // test traversing vertices + for( IndexType j = 0; j <= ySize; j++ ) + for( IndexType i = 0; i <= xSize; i++ ) + { + const IndexType idx = j * (xSize + 1) + i; + if( j == 0 || j == ySize || i == 0 || i == xSize ) { + EXPECT_EQ( array_vertices_boundary[ idx ], 1 ); + EXPECT_EQ( array_vertices_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_vertices_boundary[ idx ], 0 ); + EXPECT_EQ( array_vertices_interior[ idx ], 1 ); + } + EXPECT_EQ( array_vertices_all[ idx ], 1 ); + } + + // test traverser with CUDA +#ifdef HAVE_CUDA + using DeviceMesh = Mesh< TestQuadrilateralMeshConfig, Devices::Cuda >; + Pointers::SharedPointer< DeviceMesh > deviceMeshPointer; + *deviceMeshPointer = *meshPointer; + + testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all ); + testCudaTraverser< EdgeMeshEntityType >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all ); + testCudaTraverser< VertexMeshEntityType >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all ); +#endif +} + +TEST( MeshTest, RegularMeshOfHexahedronsTest ) +{ + using HexahedronMeshEntityType = MeshEntity< TestHexahedronMeshConfig, Devices::Host, Topologies::Hexahedron >; + using QuadrilateralMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 2 >::SubentityType; + using EdgeMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 1 >::SubentityType; + using VertexMeshEntityType = typename HexahedronMeshEntityType::SubentityTraits< 0 >::SubentityType; + + using PointType = typename VertexMeshEntityType::PointType; + static_assert( std::is_same< PointType, Containers::StaticVector< 3, RealType > >::value, + "unexpected PointType" ); + + const IndexType xSize( 3 ), ySize( 4 ), zSize( 5 ); + const RealType width( 1.0 ), height( 1.0 ), depth( 1.0 ); + const RealType hx( width / ( RealType ) xSize ), + hy( height / ( RealType ) ySize ), + hz( depth / ( RealType ) zSize ); + const IndexType numberOfCells = xSize * ySize * zSize; + const IndexType numberOfVertices = ( xSize + 1 ) * ( ySize + 1 ) * ( zSize + 1 ); + + using TestHexahedronMesh = Mesh< TestHexahedronMeshConfig >; + Pointers::SharedPointer< TestHexahedronMesh > meshPointer; + MeshBuilder< TestHexahedronMesh > meshBuilder; + meshBuilder.setPointsCount( numberOfVertices ); + meshBuilder.setCellsCount( numberOfCells ); + + /**** + * Setup vertices + */ + for( IndexType k = 0; k <= zSize; k++ ) + for( IndexType j = 0; j <= ySize; j++ ) + for( IndexType i = 0; i <= xSize; i++ ) + meshBuilder.setPoint( k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i, PointType( i * hx, j * hy, k * hz ) ); + + /**** + * Setup cells + */ + IndexType cellIdx( 0 ); + for( IndexType k = 0; k < zSize; k++ ) + for( IndexType j = 0; j < ySize; j++ ) + for( IndexType i = 0; i < xSize; i++ ) + { + const IndexType vertex0 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i; + const IndexType vertex1 = k * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1; + const IndexType vertex2 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1; + const IndexType vertex3 = k * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i; + const IndexType vertex4 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i; + const IndexType vertex5 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + j * ( xSize + 1 ) + i + 1; + const IndexType vertex6 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i + 1; + const IndexType vertex7 = ( k + 1 ) * ( xSize + 1 ) * ( ySize + 1 ) + ( j + 1 ) * ( xSize + 1 ) + i; + + meshBuilder.getCellSeed( cellIdx ).setCornerId( 0, vertex0 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 1, vertex1 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 2, vertex2 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 3, vertex3 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 4, vertex4 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 5, vertex5 ); + meshBuilder.getCellSeed( cellIdx ).setCornerId( 6, vertex6 ); + meshBuilder.getCellSeed( cellIdx++ ).setCornerId( 7, vertex7 ); + } + + ASSERT_TRUE( meshBuilder.build( *meshPointer ) ); + + // traversers for all test cases + Traverser< TestHexahedronMesh, HexahedronMeshEntityType > traverser_cells; + Traverser< TestHexahedronMesh, QuadrilateralMeshEntityType > traverser_faces; + Traverser< TestHexahedronMesh, EdgeMeshEntityType > traverser_edges; + Traverser< TestHexahedronMesh, VertexMeshEntityType > traverser_vertices; + + // arrays for all test cases + Containers::Array< int > array_cells_boundary( meshPointer->template getEntitiesCount< 3 >() ); + Containers::Array< int > array_cells_interior( meshPointer->template getEntitiesCount< 3 >() ); + Containers::Array< int > array_cells_all ( meshPointer->template getEntitiesCount< 3 >() ); + + Containers::Array< int > array_faces_boundary( meshPointer->template getEntitiesCount< 2 >() ); + Containers::Array< int > array_faces_interior( meshPointer->template getEntitiesCount< 2 >() ); + Containers::Array< int > array_faces_all ( meshPointer->template getEntitiesCount< 2 >() ); + + Containers::Array< int > array_edges_boundary( meshPointer->template getEntitiesCount< 1 >() ); + Containers::Array< int > array_edges_interior( meshPointer->template getEntitiesCount< 1 >() ); + Containers::Array< int > array_edges_all ( meshPointer->template getEntitiesCount< 1 >() ); + + Containers::Array< int > array_vertices_boundary( meshPointer->template getEntitiesCount< 0 >() ); + Containers::Array< int > array_vertices_interior( meshPointer->template getEntitiesCount< 0 >() ); + Containers::Array< int > array_vertices_all ( meshPointer->template getEntitiesCount< 0 >() ); + + // reset all arrays + array_cells_boundary.setValue( 0 ); + array_cells_interior.setValue( 0 ); + array_cells_all .setValue( 0 ); + + array_faces_boundary.setValue( 0 ); + array_faces_interior.setValue( 0 ); + array_faces_all .setValue( 0 ); + + array_edges_boundary.setValue( 0 ); + array_edges_interior.setValue( 0 ); + array_edges_all .setValue( 0 ); + + array_vertices_boundary.setValue( 0 ); + array_vertices_interior.setValue( 0 ); + array_vertices_all .setValue( 0 ); + + // traverse for all test cases + traverser_cells.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_cells_boundary.getView() ); + traverser_cells.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_cells_interior.getView() ); + traverser_cells.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_cells_all.getView() ); + + traverser_faces.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_faces_boundary.getView() ); + traverser_faces.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_faces_interior.getView() ); + traverser_faces.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_faces_all.getView() ); + + traverser_edges.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_edges_boundary.getView() ); + traverser_edges.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_edges_interior.getView() ); + traverser_edges.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_edges_all.getView() ); + + traverser_vertices.template processBoundaryEntities< TestEntitiesProcessor >( meshPointer, array_vertices_boundary.getView() ); + traverser_vertices.template processInteriorEntities< TestEntitiesProcessor >( meshPointer, array_vertices_interior.getView() ); + traverser_vertices.template processAllEntities < TestEntitiesProcessor >( meshPointer, array_vertices_all.getView() ); + + // test traversing cells + for( IndexType k = 0; k < zSize; k++ ) + for( IndexType j = 0; j < ySize; j++ ) + for( IndexType i = 0; i < xSize; i++ ) + { + const IndexType idx = k * xSize * ySize + j * xSize + i; + if( k == 0 || k == zSize - 1 || j == 0 || j == ySize - 1 || i == 0 || i == xSize - 1 ) { + EXPECT_EQ( array_cells_boundary[ idx ], 1 ); + EXPECT_EQ( array_cells_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_cells_boundary[ idx ], 0 ); + EXPECT_EQ( array_cells_interior[ idx ], 1 ); + } + EXPECT_EQ( array_cells_all[ idx ], 1 ); + } + + // test traversing faces + // (faces are not numbered systematically, so we just compare with isBoundaryEntity) + for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 2 >(); idx++ ) + { + if( meshPointer->template isBoundaryEntity< 2 >( idx ) ) { + EXPECT_EQ( array_faces_boundary[ idx ], 1 ); + EXPECT_EQ( array_faces_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_faces_boundary[ idx ], 0 ); + EXPECT_EQ( array_faces_interior[ idx ], 1 ); + } + EXPECT_EQ( array_faces_all[ idx ], 1 ); + } + + // test traversing edges + // (edges are not numbered systematically, so we just compare with isBoundaryEntity) + for( IndexType idx = 0; idx < meshPointer->template getEntitiesCount< 1 >(); idx++ ) + { + if( meshPointer->template isBoundaryEntity< 1 >( idx ) ) { + EXPECT_EQ( array_edges_boundary[ idx ], 1 ); + EXPECT_EQ( array_edges_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_edges_boundary[ idx ], 0 ); + EXPECT_EQ( array_edges_interior[ idx ], 1 ); + } + EXPECT_EQ( array_edges_all[ idx ], 1 ); + } + + // test traversing vertices + for( IndexType k = 0; k <= zSize; k++ ) + for( IndexType j = 0; j <= ySize; j++ ) + for( IndexType i = 0; i <= xSize; i++ ) + { + const IndexType idx = k * (xSize + 1) * (ySize + 1) + j * (xSize + 1) + i; + if( k == 0 || k == zSize || j == 0 || j == ySize || i == 0 || i == xSize ) { + EXPECT_EQ( array_vertices_boundary[ idx ], 1 ); + EXPECT_EQ( array_vertices_interior[ idx ], 0 ); + } + else { + EXPECT_EQ( array_vertices_boundary[ idx ], 0 ); + EXPECT_EQ( array_vertices_interior[ idx ], 1 ); + } + EXPECT_EQ( array_vertices_all[ idx ], 1 ); + } + + // test traverser with CUDA +#ifdef HAVE_CUDA + using DeviceMesh = Mesh< TestHexahedronMeshConfig, Devices::Cuda >; + Pointers::SharedPointer< DeviceMesh > deviceMeshPointer; + *deviceMeshPointer = *meshPointer; + + testCudaTraverser< HexahedronMeshEntityType >( deviceMeshPointer, array_cells_boundary, array_cells_interior, array_cells_all ); + testCudaTraverser< QuadrilateralMeshEntityType >( deviceMeshPointer, array_faces_boundary, array_faces_interior, array_faces_all ); + testCudaTraverser< EdgeMeshEntityType >( deviceMeshPointer, array_edges_boundary, array_edges_interior, array_edges_all ); + testCudaTraverser< VertexMeshEntityType >( deviceMeshPointer, array_vertices_boundary, array_vertices_interior, array_vertices_all ); +#endif +} + +} // namespace MeshTest + +#endif diff --git a/src/UnitTests/Pointers/SharedPointerCudaTest.cu b/src/UnitTests/Pointers/SharedPointerCudaTest.cu index c0d76b2cc050d074831a4a6065d71b99ea24a7e9..83b6b4793bf6d5e17f6587b71c60261e8b80cea0 100644 --- a/src/UnitTests/Pointers/SharedPointerCudaTest.cu +++ b/src/UnitTests/Pointers/SharedPointerCudaTest.cu @@ -55,7 +55,7 @@ TEST( SharedPointerCudaTest, getDataTest ) ASSERT_EQ( ptr1->y(), 2 ); #else - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); TestType aux; @@ -89,7 +89,7 @@ TEST( SharedPointerCudaTest, getDataArrayTest ) ptr->setElement( 0, 1 ); ptr->setElement( 1, 2 ); - Devices::Cuda::synchronizeDevice(); + Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); int *testArray_device, *testArray_host; cudaMalloc( ( void** ) &testArray_device, 2 * sizeof( int ) ); diff --git a/src/UnitTests/TypeInfoTest.cpp b/src/UnitTests/TypeInfoTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c47cdffc819c27f08fc62e0b8c01110b888d0bfd --- /dev/null +++ b/src/UnitTests/TypeInfoTest.cpp @@ -0,0 +1,165 @@ +/*************************************************************************** + TypeInfoTest.cpp - description + ------------------- + begin : Aug 20, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/TypeInfo.h> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> +#endif + +using namespace TNL; + +#ifdef HAVE_GTEST + +enum MyEnumType { foo, bar }; +enum class MyEnumClass { foo, bar }; + +class MyClass {}; +class MyClassWithGetSerializationType +{ +public: + static std::string getSerializationType() { return "SomethingElse"; } +}; + +template< typename... > +class MyClassTemplate {}; + +class MyPolymorphicBase +{ +public: + virtual ~MyPolymorphicBase() {} +}; +class MyPolymorphicDerived : public MyPolymorphicBase +{ +public: + virtual ~MyPolymorphicDerived() {} +}; + + +TEST( TypeInfoTest, getType ) +{ + // non-const variants + EXPECT_EQ( getType< void >(), std::string( "void" ) ); + EXPECT_EQ( getType< bool >(), std::string( "bool" ) ); + + EXPECT_EQ( getType< char >(), std::string( "char" ) ); + EXPECT_EQ( getType< short >(), std::string( "short" ) ); + EXPECT_EQ( getType< int >(), std::string( "int" ) ); + EXPECT_EQ( getType< long >(), std::string( "long" ) ); + + EXPECT_EQ( getType< unsigned char >(), std::string( "unsigned char" ) ); + EXPECT_EQ( getType< unsigned short >(), std::string( "unsigned short" ) ); + EXPECT_EQ( getType< unsigned int >(), std::string( "unsigned int" ) ); + EXPECT_EQ( getType< unsigned long >(), std::string( "unsigned long" ) ); + + EXPECT_EQ( getType< signed char >(), std::string( "signed char" ) ); + + EXPECT_EQ( getType< float >(), std::string( "float" ) ); + EXPECT_EQ( getType< double >(), std::string( "double" ) ); + EXPECT_EQ( getType< long double >(), std::string( "long double" ) ); + + // const variants - top-level cv-qualifiers are ignored + EXPECT_EQ( getType< const void >(), std::string( "void" ) ); + EXPECT_EQ( getType< const bool >(), std::string( "bool" ) ); + + EXPECT_EQ( getType< const char >(), std::string( "char" ) ); + EXPECT_EQ( getType< const short >(), std::string( "short" ) ); + EXPECT_EQ( getType< const int >(), std::string( "int" ) ); + EXPECT_EQ( getType< const long >(), std::string( "long" ) ); + + EXPECT_EQ( getType< const unsigned char >(), std::string( "unsigned char" ) ); + EXPECT_EQ( getType< const unsigned short >(), std::string( "unsigned short" ) ); + EXPECT_EQ( getType< const unsigned int >(), std::string( "unsigned int" ) ); + EXPECT_EQ( getType< const unsigned long >(), std::string( "unsigned long" ) ); + + EXPECT_EQ( getType< const signed char >(), std::string( "signed char" ) ); + + EXPECT_EQ( getType< const float >(), std::string( "float" ) ); + EXPECT_EQ( getType< const double >(), std::string( "double" ) ); + EXPECT_EQ( getType< const long double >(), std::string( "long double" ) ); + + // enum types + EXPECT_EQ( getType< MyEnumType >(), std::string( "MyEnumType" ) ); + EXPECT_EQ( getType< MyEnumClass >(), std::string( "MyEnumClass" ) ); + + // classes + EXPECT_EQ( getType< MyClass >(), std::string( "MyClass" ) ); + EXPECT_EQ( getType< MyClassWithGetSerializationType >(), std::string( "MyClassWithGetSerializationType" ) ); + + // class templates + using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >; + EXPECT_EQ( getType< T1 >(), std::string( "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" ) ); + + // polymorphic base + MyPolymorphicDerived obj; + MyPolymorphicBase* ptr = &obj; + // no dynamic cast for pointer types + EXPECT_EQ( getType( ptr ), std::string( "MyPolymorphicBase*" ) ); + // reference to a polymorphic object gets dynamic cast + EXPECT_EQ( getType( *ptr ), std::string( "MyPolymorphicDerived" ) ); +} + +TEST( TypeInfoTest, getSerializationType ) +{ + // non-const variants + EXPECT_EQ( getSerializationType< void >(), std::string( "void" ) ); + EXPECT_EQ( getSerializationType< bool >(), std::string( "bool" ) ); + + EXPECT_EQ( getSerializationType< char >(), std::string( "char" ) ); + EXPECT_EQ( getSerializationType< short >(), std::string( "short" ) ); + EXPECT_EQ( getSerializationType< int >(), std::string( "int" ) ); + EXPECT_EQ( getSerializationType< long >(), std::string( "long" ) ); + + EXPECT_EQ( getSerializationType< unsigned char >(), std::string( "unsigned char" ) ); + EXPECT_EQ( getSerializationType< unsigned short >(), std::string( "unsigned short" ) ); + EXPECT_EQ( getSerializationType< unsigned int >(), std::string( "unsigned int" ) ); + EXPECT_EQ( getSerializationType< unsigned long >(), std::string( "unsigned long" ) ); + + EXPECT_EQ( getSerializationType< signed char >(), std::string( "signed char" ) ); + + EXPECT_EQ( getSerializationType< float >(), std::string( "float" ) ); + EXPECT_EQ( getSerializationType< double >(), std::string( "double" ) ); + EXPECT_EQ( getSerializationType< long double >(), std::string( "long double" ) ); + + // const variants - top-level cv-qualifiers are ignored + EXPECT_EQ( getSerializationType< const void >(), std::string( "void" ) ); + EXPECT_EQ( getSerializationType< const bool >(), std::string( "bool" ) ); + + EXPECT_EQ( getSerializationType< const char >(), std::string( "char" ) ); + EXPECT_EQ( getSerializationType< const short >(), std::string( "short" ) ); + EXPECT_EQ( getSerializationType< const int >(), std::string( "int" ) ); + EXPECT_EQ( getSerializationType< const long >(), std::string( "long" ) ); + + EXPECT_EQ( getSerializationType< const unsigned char >(), std::string( "unsigned char" ) ); + EXPECT_EQ( getSerializationType< const unsigned short >(), std::string( "unsigned short" ) ); + EXPECT_EQ( getSerializationType< const unsigned int >(), std::string( "unsigned int" ) ); + EXPECT_EQ( getSerializationType< const unsigned long >(), std::string( "unsigned long" ) ); + + EXPECT_EQ( getSerializationType< const signed char >(), std::string( "signed char" ) ); + + EXPECT_EQ( getSerializationType< const float >(), std::string( "float" ) ); + EXPECT_EQ( getSerializationType< const double >(), std::string( "double" ) ); + EXPECT_EQ( getSerializationType< const long double >(), std::string( "long double" ) ); + + // enum types + EXPECT_EQ( getSerializationType< MyEnumType >(), std::string( "MyEnumType" ) ); + EXPECT_EQ( getSerializationType< MyEnumClass >(), std::string( "MyEnumClass" ) ); + + // classes + EXPECT_EQ( getSerializationType< MyClass >(), std::string( "MyClass" ) ); + EXPECT_EQ( getSerializationType< MyClassWithGetSerializationType >(), std::string( "SomethingElse" ) ); + + // class templates + using T1 = MyClassTemplate< int, MyClassTemplate< int, int >, MyClass >; + EXPECT_EQ( getSerializationType< T1 >(), "MyClassTemplate<int, MyClassTemplate<int, int>, MyClass>" ); +} +#endif + +#include "main.h"