Commit 6f736e10 authored by Jakub Klinkovský's avatar Jakub Klinkovský

Merge branch 'JK/execution' into 'develop'

Refactoring for execution policies

Closes #49, #46, and #11

See merge request !42
parents ccd42739 9723c16b
......@@ -21,7 +21,6 @@ stages:
WITH_OPENMP: "no"
WITH_CUDA: "no"
WITH_CUDA_ARCH: "auto"
WITH_MIC: "no"
WITH_MPI: "no"
# configurations
WITH_TESTS: "no"
......@@ -46,6 +45,8 @@ stages:
fi
- export CTEST_OUTPUT_ON_FAILURE=1
- export CTEST_PARALLEL_LEVEL=4
# enforce (more or less) warning-free builds
- export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla"
- mkdir -p "./builddir/$CI_JOB_NAME"
- pushd "./builddir/$CI_JOB_NAME"
- cmake ../..
......@@ -56,7 +57,6 @@ stages:
-DWITH_MPI=${WITH_MPI}
-DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_MIC=${WITH_MIC}
-DWITH_TESTS=${WITH_TESTS}
-DWITH_DOC=${WITH_DOC}
-DWITH_COVERAGE=${WITH_COVERAGE}
......
......@@ -17,7 +17,6 @@ set( tnlVersion "0.1" )
# declare all custom build options
option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF)
option(WITH_MIC "Build with MIC support" OFF)
option(WITH_CUDA "Build with CUDA support" ON)
set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures")
option(WITH_OPENMP "Build with OpenMP support" ON)
......@@ -83,7 +82,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON )
set( CMAKE_CXX_EXTENSIONS OFF )
# set Debug/Release options
set( CMAKE_CXX_FLAGS "-pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" )
set( CMAKE_CXX_FLAGS_DEBUG "-g" )
set( CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG" )
# pass -rdynamic only in Debug mode
......@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} )
endif()
endif()
if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
#####
# Check for MIC
#
if( ${WITH_MIC} )
message( "Enabled MIC support." )
set( MIC_CXX_FLAGS "-DHAVE_MIC")
# build all tests with MIC support
set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC )
set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" )
else()
set( MIC_CXX_FLAGS "")
endif()
endif()
# force colorized output in continuous integration
if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
......@@ -355,7 +338,6 @@ INCLUDE( CPack )
# Print custom build options
message( "-- Build options:" )
message( " OFFLINE_BUILD = ${OFFLINE_BUILD}" )
message( " WITH_MIC = ${WITH_MIC}" )
message( " WITH_CUDA = ${WITH_CUDA}" )
message( " WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" )
message( " WITH_OPENMP = ${WITH_OPENMP}" )
......
......@@ -17,7 +17,7 @@ int main()
*/
File file;
file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.save< double, double, Devices::Host >( doubleArray, size );
file.save< double, double, Allocators::Host< double > >( doubleArray, size );
file.close();
/***
......@@ -31,7 +31,7 @@ int main()
* Read array from the file to device
*/
file.open( "file-example-cuda-test-file.tnl", std::ios_base::in );
file.load< double, double, Devices::Cuda >( deviceArray, size );
file.load< double, double, Allocators::Cuda< double > >( deviceArray, size );
file.close();
/***
......
......@@ -18,21 +18,21 @@ int main()
*/
File file;
file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.save< double, float, Devices::Host >( doubleArray, size );
file.save< double, float >( doubleArray, size );
file.close();
/***
* Load the array of floats from the file.
*/
file.open( "test-file.tnl", std::ios_base::in );
file.load< float, float, Devices::Host >( floatArray, size );
file.load< float, float >( floatArray, size );
file.close();
/***
* Load the array of floats from the file and convert them to integers.
*/
file.open( "test-file.tnl", std::ios_base::in );
file.load< int, float, Devices::Host >( intArray, size );
file.load< int, float >( intArray, size );
file.close();
/***
......
#include <iostream>
#include <TNL/param-types.h>
#include <TNL/TypeInfo.h>
#include <TNL/Object.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
......@@ -13,24 +13,12 @@ class MyArray : public Object
{
public:
using HostType = MyArray< Value, Devices::Host >;
static String getType()
{
return "MyArray< " + TNL::getType< Value >() + ", " + TNL::getType< Device >() + " >";
}
String getTypeVirtual() const
{
return getType();
}
static String getSerializationType()
{
return HostType::getType();
return "MyArray< " + TNL::getType< Value >() + ", " + getType< Devices::Host >() + " >";
}
String getSerializationTypeVirtual() const
virtual String getSerializationTypeVirtual() const override
{
return getSerializationType();
}
......@@ -47,11 +35,11 @@ int main()
Object* cudaArrayPtr = &cudaArray;
// Object types
cout << "HostArray type is " << HostArray::getType() << endl;
cout << "hostArrayPtr type is " << hostArrayPtr->getTypeVirtual() << endl;
cout << "HostArray type is " << getType< HostArray >() << endl;
cout << "hostArrayPtr type is " << getType( *hostArrayPtr ) << endl;
cout << "CudaArray type is " << CudaArray::getType() << endl;
cout << "cudaArrayPtr type is " << cudaArrayPtr->getTypeVirtual() << endl;
cout << "CudaArray type is " << getType< CudaArray >() << endl;
cout << "cudaArrayPtr type is " << getType( *cudaArrayPtr ) << endl;
// Object serialization types
cout << "HostArray serialization type is " << HostArray::getSerializationType() << endl;
......@@ -60,4 +48,3 @@ int main()
cout << "CudaArray serialization type is " << CudaArray::getSerializationType() << endl;
cout << "cudaArrayPtr serialization type is " << cudaArrayPtr->getSerializationTypeVirtual() << endl;
}
#include <iostream>
#include <TNL/String.h>
#include <TNL/Containers/List.h>
#include <TNL/File.h>
using namespace TNL;
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
......
......@@ -4,7 +4,7 @@
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
void scan( Vector< double, Device >& v )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double mapReduce( Vector< double, Device >& u )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
#include <TNL/Timer.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double mapReduce( Vector< double, Device >& u )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
#include <TNL/Timer.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double mapReduce( Vector< double, Device >& u )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double maximumNorm( const Vector< double, Device >& v )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double product( const Vector< double, Device >& v )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
std::pair< int, double >
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double scalarProduct( const Vector< double, Device >& u, const Vector< double, Device >& v )
......
......@@ -4,7 +4,7 @@
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
void scan( Vector< double, Device >& v )
......
......@@ -4,7 +4,7 @@
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
void segmentedScan( Vector< double, Device >& v, Vector< bool, Device >& flags )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double sum( const Vector< double, Device >& v )
......
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
using namespace TNL::Algorithms;
template< typename Device >
double updateAndResidue( Vector< double, Device >& u, const Vector< double, Device >& delta_u, const double& tau )
......
......@@ -24,7 +24,6 @@ void expressions()
b.evaluate( [] __cuda_callable__ ( int i )->RealType { return i - 5.0; } );
c = -5;
int arg;
std::cout << "a = " << a << std::endl;
std::cout << "b = " << b << std::endl;
std::cout << "c = " << c << std::endl;
......
......@@ -12,13 +12,20 @@ Similarly to the STL, features provided by the TNL can be grouped into
several modules:
- _Core concepts_.
The main concept used in the TNL is the `Device` type which is used in most of
the other parts of the library. For data structures such as `Array` it
specifies where the data should be allocated, whereas for algorithms such as
`ParallelFor` it specifies how the algorithm should be executed.
The main concepts used in TNL are the _memory space_, which represents the
part of memory where given data is allocated, and the _execution model_,
which represents the way how given (typically parallel) algorithm is executed.
For example, data can be allocated in the main system memory, in the GPU
memory, or using the CUDA Unified Memory which can be accessed from the host
as well as from the GPU. On the other hand, algorithms can be executed using
either the host CPU or an accelerator (GPU), and for each there are many ways
to manage parallel execution. The usage of memory spaces is abstracted with
[allocators][allocators] and the execution model is represented by
[devices][devices]. See the [Core concepts][core concepts] page for details.
- _[Containers][containers]_.
TNL provides generic containers such as array, multidimensional array or array
views, which abstract data management on different hardware architectures.
views, which abstract data management and execution of common operations on
different hardware architectures.
- _Linear algebra._
TNL provides generic data structures and algorithms for linear algebra, such
as [vectors][vectors], [sparse matrices][matrices],
......@@ -39,6 +46,9 @@ several modules:
[libpng](http://www.libpng.org/pub/png/libpng.html) for PNG files, or
[libjpeg](http://libjpeg.sourceforge.net/) for JPEG files.
[allocators]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Allocators.html
[devices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Devices.html
[core concepts]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/core_concepts.html
[containers]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Containers.html
[vectors]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/classTNL_1_1Containers_1_1Vector.html
[matrices]: https://mmg-gitlab.fjfi.cvut.cz/doc/tnl/namespaceTNL_1_1Matrices.html
......
......@@ -48,7 +48,6 @@ do
--offline-build ) OFFLINE_BUILD="yes" ;;
--with-clang=* ) WITH_CLANG="${option#*=}" ;;
--with-mpi=* ) WITH_MPI="${option#*=}" ;;
--with-mic=* ) WITH_MIC="${option#*=}" ;;
--with-cuda=* ) WITH_CUDA="${option#*=}" ;;
--with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";;
--with-openmp=* ) WITH_OPENMP="${option#*=}" ;;
......@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then
echo " --install=yes/no Enables the installation of TNL files."
echo " --offline-build=yes/no Disables online updates during the build. 'no' by default."
echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)."
echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default."
......@@ -110,7 +108,12 @@ else
export CC=gcc
fi
if hash ninja 2>/dev/null; then
if [[ ! $(command -v cmake) ]]; then
echo "Error: cmake is not installed. See http://www.cmake.org/download/" >&2
exit 1
fi
if [[ $(command -v ninja) ]]; then
generator=Ninja
make=ninja
check_file="build.ninja"
......@@ -126,7 +129,6 @@ cmake_command=(
-DCMAKE_BUILD_TYPE=${BUILD}
-DCMAKE_INSTALL_PREFIX=${PREFIX}
-DOFFLINE_BUILD=${OFFLINE_BUILD}
-DWITH_MIC=${WITH_MIC}
-DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_OPENMP=${WITH_OPENMP}
......
#!/bin/bash
set -e
BUILD_DEBUG="yes"
BUILD_RELEASE="yes"
OPTIONS=""
CMAKE_TEST=`which cmake`
if test x${CMAKE_TEST} = "x";
then
echo "Cmake is not installed on your system. Please install it by:"
echo ""
echo " sudo apt-get install cmake on Ubuntu and Debian based systems"
echo " sudo yum install cmake on RedHat, Fedora or CentOS"
echo " sudo zypper install cmake on OpenSuse"
echo ""
echo "You may also install it from the source code at:"
echo " http://www.cmake.org/download/"
exit 1