...
 
Commits (36)
...@@ -21,7 +21,6 @@ stages: ...@@ -21,7 +21,6 @@ stages:
WITH_OPENMP: "no" WITH_OPENMP: "no"
WITH_CUDA: "no" WITH_CUDA: "no"
WITH_CUDA_ARCH: "auto" WITH_CUDA_ARCH: "auto"
WITH_MIC: "no"
WITH_MPI: "no" WITH_MPI: "no"
# configurations # configurations
WITH_TESTS: "no" WITH_TESTS: "no"
...@@ -46,6 +45,8 @@ stages: ...@@ -46,6 +45,8 @@ stages:
fi fi
- export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_OUTPUT_ON_FAILURE=1
- export CTEST_PARALLEL_LEVEL=4 - export CTEST_PARALLEL_LEVEL=4
# enforce (more or less) warning-free builds
- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
- mkdir -p "./builddir/$CI_JOB_NAME" - mkdir -p "./builddir/$CI_JOB_NAME"
- pushd "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME"
- cmake ../.. - cmake ../..
...@@ -56,7 +57,6 @@ stages: ...@@ -56,7 +57,6 @@ stages:
-DWITH_MPI=${WITH_MPI} -DWITH_MPI=${WITH_MPI}
-DWITH_CUDA=${WITH_CUDA} -DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_MIC=${WITH_MIC}
-DWITH_TESTS=${WITH_TESTS} -DWITH_TESTS=${WITH_TESTS}
-DWITH_DOC=${WITH_DOC} -DWITH_DOC=${WITH_DOC}
-DWITH_COVERAGE=${WITH_COVERAGE} -DWITH_COVERAGE=${WITH_COVERAGE}
......
...@@ -17,7 +17,6 @@ set( tnlVersion "0.1" ) ...@@ -17,7 +17,6 @@ set( tnlVersion "0.1" )
# declare all custom build options # declare all custom build options
option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF) option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF)
option(WITH_MIC "Build with MIC support" OFF)
option(WITH_CUDA "Build with CUDA support" ON) option(WITH_CUDA "Build with CUDA support" ON)
set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures") set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures")
option(WITH_OPENMP "Build with OpenMP support" ON) option(WITH_OPENMP "Build with OpenMP support" ON)
...@@ -83,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) ...@@ -83,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
set( CMAKE_CXX_EXTENSIONS OFF ) set( CMAKE_CXX_EXTENSIONS OFF )
# set Debug/Release options # set Debug/Release options
set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" )
set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" )
# pass -rdynamic only in Debug mode # pass -rdynamic only in Debug mode
...@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} ) ...@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} )
endif() endif()
endif() endif()
if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
#####
# Check for MIC
#
if( ${WITH_MIC} )
message( "Enabled MIC support." )
set( MIC_CXX_FLAGS "-DHAVE_MIC")
# build all tests with MIC support
set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC )
set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" )
else()
set( MIC_CXX_FLAGS "")
endif()
endif()
# force colorized output in continuous integration # force colorized output in continuous integration
if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" ) if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.") message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
...@@ -355,7 +338,6 @@ INCLUDE( CPack ) ...@@ -355,7 +338,6 @@ INCLUDE( CPack )
# Print custom build options # Print custom build options
message( "-- Build options:" ) message( "-- Build options:" )
message( " OFFLINE_BUILD = ${OFFLINE_BUILD}" ) message( " OFFLINE_BUILD = ${OFFLINE_BUILD}" )
message( " WITH_MIC = ${WITH_MIC}" )
message( " WITH_CUDA = ${WITH_CUDA}" ) message( " WITH_CUDA = ${WITH_CUDA}" )
message( " WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" ) message( " WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" )
message( " WITH_OPENMP = ${WITH_OPENMP}" ) message( " WITH_OPENMP = ${WITH_OPENMP}" )
......
...@@ -17,7 +17,7 @@ int main() ...@@ -17,7 +17,7 @@ int main()
*/ */
File file; File file;
file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc ); file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.save< double, double, Devices::Host >( doubleArray, size ); file.save< double, double, Allocators::Host< double > >( doubleArray, size );
file.close(); file.close();
/*** /***
...@@ -31,7 +31,7 @@ int main() ...@@ -31,7 +31,7 @@ int main()
* Read array from the file to device * Read array from the file to device
*/ */
file.open( "file-example-cuda-test-file.tnl", std::ios_base::in ); file.open( "file-example-cuda-test-file.tnl", std::ios_base::in );
file.load< double, double, Devices::Cuda >( deviceArray, size ); file.load< double, double, Allocators::Cuda< double > >( deviceArray, size );
file.close(); file.close();
/*** /***
......
...@@ -18,21 +18,21 @@ int main() ...@@ -18,21 +18,21 @@ int main()
*/ */
File file; File file;
file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc ); file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.save< double, float, Devices::Host >( doubleArray, size ); file.save< double, float >( doubleArray, size );
file.close(); file.close();
/*** /***
* Load the array of floats from the file. * Load the array of floats from the file.
*/ */
file.open( "test-file.tnl", std::ios_base::in ); file.open( "test-file.tnl", std::ios_base::in );
file.load< float, float, Devices::Host >( floatArray, size ); file.load< float, float >( floatArray, size );
file.close(); file.close();
/*** /***
* Load the array of floats from the file and convert them to integers. * Load the array of floats from the file and convert them to integers.
*/ */
file.open( "test-file.tnl", std::ios_base::in ); file.open( "test-file.tnl", std::ios_base::in );
file.load< int, float, Devices::Host >( intArray, size ); file.load< int, float >( intArray, size );
file.close(); file.close();
/*** /***
......
#include <iostream> #include <iostream>
#include <TNL/param-types.h> #include <TNL/TypeInfo.h>
#include <TNL/Object.h> #include <TNL/Object.h>
#include <TNL/Devices/Host.h> #include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h> #include <TNL/Devices/Cuda.h>
...@@ -13,24 +13,12 @@ class MyArray : public Object ...@@ -13,24 +13,12 @@ class MyArray : public Object
{ {
public: public:
using HostType = MyArray< Value, Devices::Host >;
static String getType()
{
return "MyArray< " + TNL::getType< Value >() + ", " + TNL::getType< Device >() + " >";
}
String getTypeVirtual() const
{
return getType();
}
static String getSerializationType() static String getSerializationType()
{ {
return HostType::getType(); return "MyArray< " + TNL::getType< Value >() + ", " + getType< Devices::Host >() + " >";
} }
String getSerializationTypeVirtual() const virtual String getSerializationTypeVirtual() const override
{ {
return getSerializationType(); return getSerializationType();
} }
...@@ -47,11 +35,11 @@ int main() ...@@ -47,11 +35,11 @@ int main()
Object* cudaArrayPtr = &cudaArray; Object* cudaArrayPtr = &cudaArray;
// Object types // Object types
cout << "HostArray type is " << HostArray::getType() << endl; cout << "HostArray type is " << getType< HostArray >() << endl;
cout << "hostArrayPtr type is " << hostArrayPtr->getTypeVirtual() << endl; cout << "hostArrayPtr type is " << getType( *hostArrayPtr ) << endl;
cout << "CudaArray type is " << CudaArray::getType() << endl; cout << "CudaArray type is " << getType< CudaArray >() << endl;
cout << "cudaArrayPtr type is " << cudaArrayPtr->getTypeVirtual() << endl; cout << "cudaArrayPtr type is " << getType( *cudaArrayPtr ) << endl;
// Object serialization types // Object serialization types
cout << "HostArray serialization type is " << HostArray::getSerializationType() << endl; cout << "HostArray serialization type is " << HostArray::getSerializationType() << endl;
...@@ -60,4 +48,3 @@ int main() ...@@ -60,4 +48,3 @@ int main()
cout << "CudaArray serialization type is " << CudaArray::getSerializationType() << endl; cout << "CudaArray serialization type is " << CudaArray::getSerializationType() << endl;
cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl; cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl;
} }
#include <iostream> #include <iostream>
#include <TNL/String.h> #include <TNL/String.h>
#include <TNL/Containers/List.h>
#include <TNL/File.h> #include <TNL/File.h>
using namespace TNL; using namespace TNL;
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v ) bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
void scan( Vector< double, Device >& v ) void scan( Vector< double, Device >& v )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double mapReduce( Vector< double, Device >& u ) double mapReduce( Vector< double, Device >& u )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
#include <TNL/Timer.h> #include <TNL/Timer.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double mapReduce( Vector< double, Device >& u ) double mapReduce( Vector< double, Device >& u )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
#include <TNL/Timer.h> #include <TNL/Timer.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double mapReduce( Vector< double, Device >& u ) double mapReduce( Vector< double, Device >& u )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double maximumNorm( const Vector< double, Device >& v ) double maximumNorm( const Vector< double, Device >& v )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double product( const Vector< double, Device >& v ) double product( const Vector< double, Device >& v )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
std::pair< int, double > std::pair< int, double >
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v ) double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v )
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
void scan( Vector< double, Device >& v ) void scan( Vector< double, Device >& v )
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags ) void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double sum( const Vector< double, Device >& v ) double sum( const Vector< double, Device >& v )
......
#include <iostream> #include <iostream>
#include <cstdlib> #include <cstdlib>
#include <TNL/Containers/Vector.h> #include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
using namespace TNL; using namespace TNL;
using namespace TNL::Containers; using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms; using namespace TNL::Algorithms;
template< typename Device > template< typename Device >
double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau ) double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau )
......
...@@ -24,7 +24,6 @@ void expressions() ...@@ -24,7 +24,6 @@ void expressions()
b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } ); b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } );
c = -5; c = -5;
int arg;
std::cout << "a = " << a << std::endl; std::cout << "a = " << a << std::endl;
std::cout << "b = " << b << std::endl; std::cout << "b = " << b << std::endl;
std::cout << "c = " << c << std::endl; std::cout << "c = " << c << std::endl;
......
...@@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into ...@@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into
several modules: several modules:
- _Core concepts_. - _Core concepts_.
The main concept used in the TNL is the `Device` type which is used in most of The main concepts used in TNL are the _memory space_, which represents the
the other parts of the library. For data structures such as `Array` it part of memory where given data is allocated, and the _execution model_,
specifies where the data should be allocated, whereas for algorithms such as which represents the way how given (typically parallel) algorithm is executed.
`ParallelFor` it specifies how the algorithm should be executed. For example, data can be allocated in the main system memory, in the GPU
memory, or using the CUDA Unified Memory which can be accessed from the host
as well as from the GPU. On the other hand, algorithms can be executed using
either the host CPU or an accelerator (GPU), and for each there are many ways
to manage parallel execution. The usage of memory spaces is abstracted with
[allocators][allocators] and the execution model is represented by
[devices][devices]. See the [Core concepts][core concepts] page for details.
- _[Containers][containers]_. - _[Containers][containers]_.
TNL provides generic containers such as array, multidimensional array or array TNL provides generic containers such as array, multidimensional array or array
views, which abstract data management on different hardware architectures. views, which abstract data management and execution of common operations on
different hardware architectures.
- _Linear algebra._ - _Linear algebra._
TNL provides generic data structures and algorithms for linear algebra, such TNL provides generic data structures and algorithms for linear algebra, such
as [vectors][vectors], [sparse matrices][matrices], as [vectors][vectors], [sparse matrices][matrices],
...@@ -39,6 +46,9 @@ several modules: ...@@ -39,6 +46,9 @@ several modules:
[libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or [libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or
[libjpeg](http://libjpeg.sourceforge.net/) for JPEG files. [libjpeg](http://libjpeg.sourceforge.net/) for JPEG files.
[allocators]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Allocators.html
[devices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Devices.html
[core concepts]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/core_concepts.html
[containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html [containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html
[vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html [vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html
[matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html [matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html
......
...@@ -48,7 +48,6 @@ do ...@@ -48,7 +48,6 @@ do
--offline-build ) OFFLINE_BUILD="yes" ;; --offline-build ) OFFLINE_BUILD="yes" ;;
--with-clang=* ) WITH_CLANG="${option#*=}" ;; --with-clang=* ) WITH_CLANG="${option#*=}" ;;
--with-mpi=* ) WITH_MPI="${option#*=}" ;; --with-mpi=* ) WITH_MPI="${option#*=}" ;;
--with-mic=* ) WITH_MIC="${option#*=}" ;;
--with-cuda=* ) WITH_CUDA="${option#*=}" ;; --with-cuda=* ) WITH_CUDA="${option#*=}" ;;
--with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";; --with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";;
--with-openmp=* ) WITH_OPENMP="${option#*=}" ;; --with-openmp=* ) WITH_OPENMP="${option#*=}" ;;
...@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then ...@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then
echo " --install=yes/no Enables the installation of TNL files." echo " --install=yes/no Enables the installation of TNL files."
echo " --offline-build=yes/no Disables online updates during the build. 'no' by default." echo " --offline-build=yes/no Disables online updates during the build. 'no' by default."
echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)." echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)."
echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)." echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default." echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default." echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default."
...@@ -110,7 +108,12 @@ else ...@@ -110,7 +108,12 @@ else
export CC=gcc export CC=gcc
fi fi
if hash ninja 2>/dev/null; then if [[ ! $(command -v cmake) ]]; then
echo "Error: cmake is not installed. See http://www.cmake.org/download/" >&2
exit 1
fi
if [[ $(command -v ninja) ]]; then
generator=Ninja generator=Ninja
make=ninja make=ninja
check_file="build.ninja" check_file="build.ninja"
...@@ -126,7 +129,6 @@ cmake_command=( ...@@ -126,7 +129,6 @@ cmake_command=(
-DCMAKE_BUILD_TYPE=${BUILD} -DCMAKE_BUILD_TYPE=${BUILD}
-DCMAKE_INSTALL_PREFIX=${PREFIX} -DCMAKE_INSTALL_PREFIX=${PREFIX}
-DOFFLINE_BUILD=${OFFLINE_BUILD} -DOFFLINE_BUILD=${OFFLINE_BUILD}
-DWITH_MIC=${WITH_MIC}
-DWITH_CUDA=${WITH_CUDA} -DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH} -DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_OPENMP=${WITH_OPENMP} -DWITH_OPENMP=${WITH_OPENMP}
......
#!/bin/bash #!/bin/bash
set -e
BUILD_DEBUG="yes" BUILD_DEBUG="yes"
BUILD_RELEASE="yes" BUILD_RELEASE="yes"
OPTIONS="" OPTIONS=""
CMAKE_TEST=`which cmake` for option in "$@"; do
if test x${CMAKE_TEST} = "x"; case $option in
then --no-debug)
echo "Cmake is not installed on your system. Please install it by:" BUILD_DEBUG="no"
echo "" ;;
echo " sudo apt-get install cmake on Ubuntu and Debian based systems" --no-release)
echo " sudo yum install cmake on RedHat, Fedora or CentOS" BUILD_RELEASE="no"
echo " sudo zypper install cmake on OpenSuse" ;;
echo "" --build=* )
echo "You may also install it from the source code at:" BUILD="${option#*=}"
echo " http://www.cmake.org/download/" if [[ "$BUILD" != "Release" ]]; then
exit 1 BUILD_RELEASE="no"
fi fi
if [[ "$BUILD" != "Debug" ]]; then
for option in "$@" BUILD_DEBUG="no"
do fi
case $option in ;;
--no-debug ) BUILD_DEBUG="no" ;; *)
--no-release ) BUILD_RELEASE="no" ;; OPTIONS="${OPTIONS} ${option}"
* ) OPTIONS="${OPTIONS} ${option}" ;; ;;
esac esac
done done
if test ${BUILD_DEBUG} = "yes"; if [[ ${BUILD_DEBUG} == "yes" ]]; then
then if [[ ! -d Debug ]]; then
if [ ! -d Debug ]; mkdir Debug
then fi
mkdir Debug pushd Debug
fi ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
cd Debug popd
if ! ../build --root-dir=.. --build=Debug --install=yes ${OPTIONS}
then
exit 1
fi
cd ..
fi fi
if test ${BUILD_RELEASE} = "yes"; if [[ ${BUILD_RELEASE} == "yes" ]]; then
then if [[ ! -d Release ]]; then
if [ ! -d Release ]; mkdir Release
then fi
mkdir Release pushd Release
fi ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
cd Release popd
if ! ../build --root-dir=.. --build=Release --install=yes ${OPTIONS};
then
exit 1
fi
cd ..
fi fi
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#pragma once #pragma once
#include <TNL/Containers/Algorithms/Reduction.h> #include <TNL/Algorithms/Reduction.h>
#include "CommonVectorOperations.h" #include "CommonVectorOperations.h"
namespace TNL { namespace TNL {
...@@ -30,7 +30,7 @@ getVectorMax( const Vector& v ) ...@@ -30,7 +30,7 @@ getVectorMax( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
} }
template< typename Device > template< typename Device >
...@@ -47,7 +47,7 @@ getVectorMin( const Vector& v ) ...@@ -47,7 +47,7 @@ getVectorMin( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) -> RealType { return data[ i ]; };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
} }
template< typename Device > template< typename Device >
...@@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v ) ...@@ -64,7 +64,7 @@ getVectorAbsMax( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
} }
template< typename Device > template< typename Device >
...@@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v ) ...@@ -81,7 +81,7 @@ getVectorAbsMin( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
} }
template< typename Device > template< typename Device >
...@@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v ) ...@@ -97,7 +97,7 @@ getVectorL1Norm( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
} }
template< typename Device > template< typename Device >
...@@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v ) ...@@ -113,7 +113,7 @@ getVectorL2Norm( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; };
return std::sqrt( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) ); return std::sqrt( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ) );
} }
template< typename Device > template< typename Device >
...@@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v, ...@@ -136,7 +136,7 @@ getVectorLpNorm( const Vector& v,
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); };
return std::pow( Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p ); return std::pow( Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ), 1.0 / p );
} }
template< typename Device > template< typename Device >
...@@ -155,7 +155,7 @@ getVectorSum( const Vector& v ) ...@@ -155,7 +155,7 @@ getVectorSum( const Vector& v )
const auto* data = v.getData(); const auto* data = v.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 ); return Algorithms::Reduction< DeviceType >::reduce( v.getSize(), std::plus<>{}, fetch, ( ResultType ) 0 );
} }
template< typename Device > template< typename Device >
...@@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1, ...@@ -175,7 +175,7 @@ getVectorDifferenceMax( const Vector1& v1,
const auto* data2 = v2.getData(); const auto* data2 = v2.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
} }
template< typename Device > template< typename Device >
...@@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1, ...@@ -195,7 +195,7 @@ getVectorDifferenceMin( const Vector1& v1,
const auto* data2 = v2.getData(); const auto* data2 = v2.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
} }
template< typename Device > template< typename Device >
...@@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1, ...@@ -215,7 +215,7 @@ getVectorDifferenceAbsMax( const Vector1& v1,
const auto* data2 = v2.getData(); const auto* data2 = v2.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );
} }
template< typename Device > template< typename Device >
...@@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1, ...@@ -235,7 +235,7 @@ getVectorDifferenceAbsMin( const Vector1& v1,
const auto* data2 = v2.getData(); const auto* data2 = v2.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
return Containers::Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); return Algorithms::Reduction< DeviceType >::reduce( v1.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
} }
template< typename Device > template< typename Device >
...@@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1, ...@@ -254,7 +254,7 @@ getVectorDifferenceL1Norm( const Vector1& v1,
const auto* data1 = v1.getData(); const auto* data1 = v1.getData();
const auto* data2 = v2.getData(); const auto* data2 = v2.getData();
auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); }; auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };