Commit dd9dd992 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber

Merge branch 'tutorials' into 'develop'

Tutorials

See merge request !41
parents 0b1c97a2 9cb46d76
......@@ -3,6 +3,7 @@
/Release
/Testing
/Documentation/html/
/Documentation/output_snippets/
/CMakeLists.txt.user
/Build
/coverage_report
......
......@@ -12,6 +12,7 @@ before_script:
# same job.
stages:
- build
- doc_build
- deploy
# default flags for cmake
......@@ -134,6 +135,10 @@ cuda_examples_Debug:
WITH_CUDA: "yes"
BUILD_TYPE: Debug
WITH_EXAMPLES: "yes"
# store output snippets for documentation
artifacts:
paths:
- Documentation/output_snippets/
cuda_examples_Release:
<<: *build_template
......@@ -398,12 +403,15 @@ mpi_benchmarks_tools_python_Release:
build documentation:
stage: build
stage: doc_build
only:
changes:
- Documentation/**/*
- src/TNL/**/*.{h,hpp}
- .gitlab-ci.yml
dependencies:
# the job which builds Documentation/output_snippets/
- cuda_examples_Debug
script:
- ./Documentation/build
artifacts:
......
......@@ -316,17 +316,13 @@ LINK_DIRECTORIES( ${LIBRARY_OUTPUT_PATH} )
add_subdirectory( src )
add_subdirectory( share )
#Check for Doxygen
if( ${WITH_DOC} )
find_package(Doxygen)
if( DOXYGEN_FOUND )
if( CMAKE_BUILD_TYPE MATCHES "Release" )
add_subdirectory( Documentation/Tutorials )
endif( CMAKE_BUILD_TYPE MATCHES "Release" )
else( DOXYGEN_FOUND )
message("Doxygen need to be installed to generate the doxygen documentation")
endif( DOXYGEN_FOUND )
endif( ${WITH_DOC} )
# Add subdirectories for examples included in the documentation
if( ${WITH_DOC} OR ${WITH_EXAMPLES} )
set( TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH "${CMAKE_SOURCE_DIR}/Documentation/output_snippets" )
file(MAKE_DIRECTORY ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH})
add_subdirectory( Documentation/Examples )
add_subdirectory( Documentation/Tutorials )
endif()
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Template Numerical Library")
set(CPACK_PACKAGE_VENDOR "MMG")
......
......@@ -503,7 +503,7 @@ HIDE_UNDOC_CLASSES = NO
# included in the documentation.
# The default value is: NO.
HIDE_FRIEND_COMPOUNDS = NO
HIDE_FRIEND_COMPOUNDS = YES
# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
# documentation blocks found inside the body of a function. If set to NO, these
......@@ -580,7 +580,7 @@ SORT_MEMBER_DOCS = YES
# this will also influence the order of the classes in the class list.
# The default value is: NO.
SORT_BRIEF_DOCS = NO
SORT_BRIEF_DOCS = YES
# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
# (brief and detailed) documentation of class members so that constructors and
......@@ -592,7 +592,7 @@ SORT_BRIEF_DOCS = NO
# detailed member documentation.
# The default value is: NO.
SORT_MEMBERS_CTORS_1ST = NO
SORT_MEMBERS_CTORS_1ST = YES
# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
# of group names into alphabetical order. If set to NO the group names will
......@@ -744,7 +744,7 @@ WARNINGS = YES
# will automatically be disabled.
# The default value is: YES.
WARN_IF_UNDOCUMENTED = YES
WARN_IF_UNDOCUMENTED = NO
# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
# potential errors in the documentation, such as not documenting some parameters
......@@ -923,11 +923,9 @@ EXCLUDE_SYMBOLS += TNL::Assert::* # internal namespace
# that contain example code fragments that are included (see the \include
# command).
EXAMPLE_PATH += ../src/Examples
EXAMPLE_PATH += Examples
EXAMPLE_PATH += Tutorials
# directories for output snippets
EXAMPLE_PATH += ../Release/Documentation/Tutorials
EXAMPLE_PATH += ../Debug/Documentation/Tutorials
EXAMPLE_PATH += output_snippets
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
......@@ -2120,8 +2118,9 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED = HAVE_MPI=1
HAVE_CUDA=1
PREDEFINED = DOXYGEN_ONLY
PREDEFINED += HAVE_MPI
PREDEFINED += HAVE_CUDA
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
......
ADD_SUBDIRECTORY( Containers )
ADD_EXECUTABLE( FileExample FileExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE(FileExampleCuda FileExampleCuda.cu)
ADD_CUSTOM_COMMAND( COMMAND FileExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleCuda.out OUTPUT FileExampleCuda.out )
ENDIF()
ADD_EXECUTABLE( FileExampleSaveAndLoad FileExampleSaveAndLoad.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileExampleSaveAndLoad > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleSaveAndLoad.out OUTPUT FileExampleSaveAndLoad.out )
ADD_EXECUTABLE( FileNameExample FileNameExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileNameExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExample.out OUTPUT FileNameExample.out )
ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeCoordinates FileNameExampleDistributedSystemNodeCoordinates.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeCoordinates > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeCoordinates.out OUTPUT FileNameExampleDistributedSystemNodeCoordinates.out )
ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeId FileNameExampleDistributedSystemNodeId.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeId > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeId.out OUTPUT FileNameExampleDistributedSystemNodeId.out )
ADD_EXECUTABLE( ObjectExample_getType ObjectExample_getType.cpp )
ADD_CUSTOM_COMMAND( COMMAND ObjectExample_getType > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ObjectExample_getType.out OUTPUT ObjectExample_getType.out )
ADD_EXECUTABLE( ParameterContainerExample ParameterContainerExample.cpp )
ADD_EXECUTABLE( ParseObjectTypeExample ParseObjectTypeExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND ParseObjectTypeExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParseObjectTypeExample.out OUTPUT ParseObjectTypeExample.out )
ADD_EXECUTABLE( StringExample StringExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExample.out OUTPUT StringExample.out )
ADD_EXECUTABLE( StringExampleGetAllocatedSize StringExampleGetAllocatedSize.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExampleGetAllocatedSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleGetAllocatedSize.out OUTPUT StringExampleGetAllocatedSize.out )
ADD_EXECUTABLE( StringExampleReplace StringExampleReplace.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExampleReplace > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleReplace.out OUTPUT StringExampleReplace.out )
ADD_EXECUTABLE( StringExampleSetSize StringExampleSetSize.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExampleSetSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSetSize.out OUTPUT StringExampleSetSize.out )
ADD_EXECUTABLE( StringExampleSplit StringExampleSplit.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExampleSplit > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSplit.out OUTPUT StringExampleSplit.out )
ADD_EXECUTABLE( StringExampleStrip StringExampleStrip.cpp )
ADD_CUSTOM_COMMAND( COMMAND StringExampleStrip > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleStrip.out OUTPUT StringExampleStrip.out )
ADD_EXECUTABLE( TimerExample TimerExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND TimerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExample.out OUTPUT TimerExample.out )
ADD_EXECUTABLE( TimerExampleLogger TimerExampleLogger.cpp )
ADD_CUSTOM_COMMAND( COMMAND TimerExampleLogger > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExampleLogger.out OUTPUT TimerExampleLogger.out )
ADD_CUSTOM_TARGET( RunExamples ALL DEPENDS
FileExample.out
FileExampleSaveAndLoad.out
FileNameExample.out
FileNameExampleDistributedSystemNodeCoordinates.out
FileNameExampleDistributedSystemNodeId.out
ObjectExample_getType.out
ParseObjectTypeExample.out
StringExample.out
StringExampleGetAllocatedSize.out
StringExampleReplace.out
StringExampleSplit.out
StringExampleStrip.out
TimerExample.out
TimerExampleLogger.out )
if( BUILD_CUDA )
ADD_CUSTOM_TARGET( RunExamples-cuda ALL DEPENDS
FileExampleCuda.out )
ENDIF()
......@@ -14,60 +14,60 @@ void arrayViewExample()
using ArrayType = Containers::Array< int, Device >;
using IndexType = typename ArrayType::IndexType;
using ViewType = Containers::ArrayView< int, Device >;
ArrayType _a1( size ), _a2( size );
ViewType a1 = _a1.getView();
ViewType a2 = _a2.getView();
ArrayType a1( size ), a2( size );
ViewType a1_view = a1.getView();
ViewType a2_view = a2.getView();
/***
* You may initiate the array view using setElement
*/
for( int i = 0; i < size; i++ )
a1.setElement( i, i );
a1_view.setElement( i, i );
/***
* You may also assign value to all array view elements ...
*/
a2 = 0;
a2_view = 0;
/***
* Simple array view values checks can be done as follows ...
*/
if( a1.containsValue( 1 ) )
if( a1_view.containsValue( 1 ) )
std::cout << "a1 contains value 1." << std::endl;
if( a1.containsValue( size ) )
if( a1_view.containsValue( size ) )
std::cout << "a1 contains value " << size << "." << std::endl;
if( a1.containsOnlyValue( 0 ) )
if( a1_view.containsOnlyValue( 0 ) )
std::cout << "a2 contains only value 0." << std::endl;
/***
* More efficient way of array view elements manipulation is with the lambda functions
*/
ArrayType _a3( size );
ViewType a3 = _a3.getView();
ArrayType a3( size );
ViewType a3_view = a3.getView();
auto f1 = [] __cuda_callable__ ( IndexType i ) -> int { return 2 * i; };
a3.evaluate( f1 );
a3_view.evaluate( f1 );
for( int i = 0; i < size; i++ )
if( a3.getElement( i ) != 2 * i )
if( a3_view.getElement( i ) != 2 * i )
std::cerr << "Something is wrong!!!" << std::endl;
/***
* You may swap array view data with the swap method.
*/
a1.swap( a3 );
a1_view.swap( a3_view );
/***
* You may save it to file and load again
*/
File( "a1.tnl", std::ios_base::out ) << a1;
File( "a1.tnl", std::ios_base::in ) >> a2;
File( "a1_view.tnl", std::ios_base::out ) << a1_view;
File( "a1_view.tnl", std::ios_base::in ) >> a2_view;
std::remove( "a1.tnl" );
std::remove( "a1_view.tnl" );
if( a2 != a1 )
if( a2_view != a1_view )
std::cerr << "Something is wrong!!!" << std::endl;
std::cout << "a2 = " << a2 << std::endl;
std::cout << "a2_view = " << a2_view << std::endl;
}
int main()
......
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( ArrayExampleCuda ArrayExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayExampleCuda > ArrayExample.out OUTPUT ArrayExample.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayExample.out OUTPUT ArrayExample.out )
ELSE()
ADD_EXECUTABLE( ArrayExample ArrayExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND ArrayExample > ArrayExample.out OUTPUT ArrayExample.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayExample.out OUTPUT ArrayExample.out )
ENDIF()
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( ArrayViewExampleCuda ArrayViewExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewExampleCuda > ArrayViewExample.out OUTPUT ArrayViewExample.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayViewExample.out OUTPUT ArrayViewExample.out )
ELSE()
ADD_EXECUTABLE( ArrayViewExample ArrayViewExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewExample > ArrayViewExample.out OUTPUT ArrayViewExample.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayViewExample.out OUTPUT ArrayViewExample.out )
ENDIF()
ADD_EXECUTABLE( VectorExample VectorExample.cpp )
IF( BUILD_CUDA )
ADD_CUSTOM_TARGET( RunContainersExamples-cuda ALL DEPENDS
......
......@@ -16,7 +16,7 @@ int main()
* Save array to file.
*/
File file;
file.open( "test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.open( "file-example-cuda-test-file.tnl", std::ios_base::out | std::ios_base::trunc );
file.save< double, double, Devices::Host >( doubleArray, size );
file.close();
......@@ -30,7 +30,7 @@ int main()
/***
* Read array from the file to device
*/
file.open( "test-file.tnl", std::ios_base::in );
file.open( "file-example-cuda-test-file.tnl", std::ios_base::in );
file.load< double, double, Devices::Cuda >( deviceArray, size );
file.close();
......
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( ArrayAllocation ArrayAllocation.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayAllocation > ArrayAllocation.out OUTPUT ArrayAllocation.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayAllocation > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayAllocation.out OUTPUT ArrayAllocation.out )
CUDA_ADD_EXECUTABLE( ArrayBinding-1 ArrayBinding-1.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-1 > ArrayBinding-1.out OUTPUT ArrayBinding-1.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-1 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayBinding-1.out OUTPUT ArrayBinding-1.out )
CUDA_ADD_EXECUTABLE( ArrayBinding-2 ArrayBinding-2.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-2 > ArrayBinding-2.out OUTPUT ArrayBinding-2.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayBinding-2.out OUTPUT ArrayBinding-2.out )
CUDA_ADD_EXECUTABLE( ArrayBinding-3 ArrayBinding-3.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-3 > ArrayBinding-3.out OUTPUT ArrayBinding-3.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayBinding-3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayBinding-3.out OUTPUT ArrayBinding-3.out )
CUDA_ADD_EXECUTABLE( ArrayIO ArrayIO.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayIO > ArrayIO.out OUTPUT ArrayIO.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayIO > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayIO.out OUTPUT ArrayIO.out )
CUDA_ADD_EXECUTABLE( ArrayView-1 ArrayView-1.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayView-1 > ArrayView-1.out OUTPUT ArrayView-1.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayView-1 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayView-1.out OUTPUT ArrayView-1.out )
CUDA_ADD_EXECUTABLE( ArrayView-2 ArrayView-2.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayView-2 > ArrayView-2.out OUTPUT ArrayView-2.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayView-2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayView-2.out OUTPUT ArrayView-2.out )
CUDA_ADD_EXECUTABLE( ArrayViewEvaluate ArrayViewEvaluate.cu )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewEvaluate > ArrayViewEvaluate.out OUTPUT ArrayViewEvaluate.out )
ADD_CUSTOM_COMMAND( COMMAND ArrayViewEvaluate > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ArrayViewEvaluate.out OUTPUT ArrayViewEvaluate.out )
CUDA_ADD_EXECUTABLE( ContainsValue ContainsValue.cu )
ADD_CUSTOM_COMMAND( COMMAND ContainsValue > ContainsValue.out OUTPUT ContainsValue.out )
ADD_CUSTOM_COMMAND( COMMAND ContainsValue > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ContainsValue.out OUTPUT ContainsValue.out )
CUDA_ADD_EXECUTABLE( ElementsAccessing-1 ElementsAccessing-1.cu )
ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-1 > ElementsAccessing-1.out OUTPUT ElementsAccessing-1.out )
ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-1 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ElementsAccessing-1.out OUTPUT ElementsAccessing-1.out )
CUDA_ADD_EXECUTABLE( ElementsAccessing-2 ElementsAccessing-2.cu )
ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-2 > ElementsAccessing-2.out OUTPUT ElementsAccessing-2.out )
ADD_CUSTOM_COMMAND( COMMAND ElementsAccessing-2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ElementsAccessing-2.out OUTPUT ElementsAccessing-2.out )
ADD_EXECUTABLE( StaticArrayExample StaticArrayExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND StaticArrayExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StaticArrayExample.out OUTPUT StaticArrayExample.out )
ENDIF()
IF( BUILD_CUDA )
......@@ -35,5 +37,6 @@ ADD_CUSTOM_TARGET( TutorialsArrays-cuda ALL DEPENDS
ContainsValue.out
ElementsAccessing-1.out
ElementsAccessing-2.out
ArrayViewEvaluate.out )
ArrayViewEvaluate.out
StaticArrayExample.out )
ENDIF()
#include <iostream>
#include <TNL/Containers/StaticArray.h>
#include <TNL/File.h>
using namespace TNL;
using namespace TNL::Containers;
int main( int argc, char* argv[] )
{
StaticArray< 3, int > a1, a2( 1, 2, 3 ), a3{ 4,3,2 };
a1 = 0.0;
std::cout << "a1 = " << a1 << std::endl;
std::cout << "a2 = " << a2 << std::endl;
std::cout << "a3 = " << a3 << std::endl;
File( "static-array-example-file.tnl", std::ios::out ) << a3;
File( "static-array-example-file.tnl", std::ios::in ) >> a1;
std::cout << "a1 = " << a1 << std::endl;
a1.sort();
std::cout << "Sorted a1 = " << a1 << std::endl;
}
......@@ -2,7 +2,7 @@
## Introduction
This tutorial introduces arrays in TNL. Array is one of the most important structure for memory management. Methods implemented in arrays are particularly useful for GPU programming. From this point of view, the reader will learn how to easily allocate memory on GPU, transfer data between GPU and CPU but also, how to initialize data allocated on GPU. In addition, the resulting code is hardware platform independent, so it can be ran on CPU without any changes.
This tutorial introduces arrays in TNL. There are three types - common arrays with dynamic allocation, static arrays allocated on stack and distributed arrays with dynamic allocation. Arrays are one of the most important structures for memory management. Methods implemented in arrays are particularly useful for GPU programming. From this point of view, the reader will learn how to easily allocate memory on GPU, transfer data between GPU and CPU but also, how to initialize data allocated on GPU. In addition, the resulting code is hardware platform independent, so it can be ran on CPU nad GPU without any changes.
## Table of Contents
1. [Arrays](#arrays)
......@@ -15,6 +15,7 @@ This tutorial introduces arrays in TNL. Array is one of the most important struc
5. [Checking the array contents](#checking_the_array_contents)
6. [IO operations with arrays](#io_operations_with-arrays)
2. [Static arrays](#static_arrays)
3. [Distributed arrays](#distributed_arrays)
## Arrays <a name="arrays"></a>
......@@ -148,3 +149,18 @@ Output:
\include ArrayIO.out
## Static arrays <a name="static_arrays"></a>
Static arrays are allocated on stack and thus they can be created even in CUDA kernels. Their size is fixed and it is given by a template parameter. Static array is a templated class defined in namespace `TNL::Containers` having two template parameters:
* `Size` is the array size.
* `Value` is type of data stored in the array.
The interface of StaticArray is very smillar to Array but much simpler. It contains set of common constructors. Array elements can be accessed by the `operator[]` and also using method `x()`, `y()` and `z()` when it makes sense. See the following example for typical use of StaticArray.
\include StaticArrayExample.cpp
The output looks as:
\include StaticArrayExample.out
## Distributed arrays <a name="distributed_arrays"></a>
add_subdirectory( Arrays )
add_subdirectory( Vectors )
\ No newline at end of file
add_subdirectory( Vectors )
add_subdirectory( ReductionAndScan )
\ No newline at end of file
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( SumExample SumExample.cu )
ADD_CUSTOM_COMMAND( COMMAND SumExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SumExample.out OUTPUT SumExample.out )
CUDA_ADD_EXECUTABLE( ProductExample ProductExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ProductExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ProductExample.out OUTPUT ProductExample.out )
CUDA_ADD_EXECUTABLE( ScalarProductExample ScalarProductExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ScalarProductExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ScalarProductExample.out OUTPUT ScalarProductExample.out )
CUDA_ADD_EXECUTABLE( MaximumNormExample MaximumNormExample.cu )
ADD_CUSTOM_COMMAND( COMMAND MaximumNormExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MaximumNormExample.out OUTPUT MaximumNormExample.out )
CUDA_ADD_EXECUTABLE( ComparisonExample ComparisonExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ComparisonExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ComparisonExample.out OUTPUT ComparisonExample.out )
CUDA_ADD_EXECUTABLE( UpdateAndResidueExample UpdateAndResidueExample.cu )
ADD_CUSTOM_COMMAND( COMMAND UpdateAndResidueExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UpdateAndResidueExample.out OUTPUT UpdateAndResidueExample.out )
CUDA_ADD_EXECUTABLE( MapReduceExample-1 MapReduceExample-1.cu )
ADD_CUSTOM_COMMAND( COMMAND MapReduceExample-1 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MapReduceExample-1.out OUTPUT MapReduceExample-1.out )
CUDA_ADD_EXECUTABLE( MapReduceExample-2 MapReduceExample-2.cu )
ADD_CUSTOM_COMMAND( COMMAND MapReduceExample-2 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MapReduceExample-2.out OUTPUT MapReduceExample-2.out )
CUDA_ADD_EXECUTABLE( MapReduceExample-3 MapReduceExample-3.cu )
ADD_CUSTOM_COMMAND( COMMAND MapReduceExample-3 > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/MapReduceExample-3.out OUTPUT MapReduceExample-3.out )
CUDA_ADD_EXECUTABLE( ReductionWithArgument ReductionWithArgument.cu )
ADD_CUSTOM_COMMAND( COMMAND ReductionWithArgument > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ReductionWithArgument.out OUTPUT ReductionWithArgument.out )
CUDA_ADD_EXECUTABLE( ScanExample ScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ScanExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ScanExample.out OUTPUT ScanExample.out )
CUDA_ADD_EXECUTABLE( ExclusiveScanExample ExclusiveScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND ExclusiveScanExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ExclusiveScanExample.out OUTPUT ExclusiveScanExample.out )
CUDA_ADD_EXECUTABLE( SegmentedScanExample SegmentedScanExample.cu )
ADD_CUSTOM_COMMAND( COMMAND SegmentedScanExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SegmentedScanExample.out OUTPUT SegmentedScanExample.out )
ENDIF()
IF( BUILD_CUDA )
ADD_CUSTOM_TARGET( TutorialsReduction-cuda ALL DEPENDS
SumExample.out
ProductExample.out
ScalarProductExample.out
MaximumNormExample.out
ComparisonExample.out
UpdateAndResidueExample.out
MapReduceExample-1.out
MapReduceExample-2.out
MapReduceExample-3.out
ReductionWithArgument.out
ScanExample.out
ExclusiveScanExample.out
SegmentedScanExample.out )
ENDIF()
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
bool comparison( const Vector< double, Device >& u, const Vector< double, Device >& v )
{
auto u_view = u.getConstView();
auto v_view = v.getConstView();
/***
* Fetch compares corresponding elements of both vectors
*/
auto fetch = [=] __cuda_callable__ ( int i )->bool { return ( u_view[ i ] == v_view[ i ] ); };
/***
* Reduce performs logical AND on intermediate results obtained by fetch.
*/
auto reduce = [] __cuda_callable__ ( const bool& a, const bool& b ) { return a && b; };
return Reduction< Device >::reduce( v_view.getSize(), reduce, fetch, true );
}
int main( int argc, char* argv[] )
{
Vector< double, Devices::Host > host_u( 10 ), host_v( 10 );
host_u = 1.0;
host_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } );
std::cout << "host_u = " << host_u << std::endl;
std::cout << "host_v = " << host_v << std::endl;
std::cout << "Comparison of host_u and host_v is: " << ( comparison( host_u, host_v ) ? "'true'" : "'false'" ) << "." << std::endl;
std::cout << "Comparison of host_u and host_u is: " << ( comparison( host_u, host_u ) ? "'true'" : "'false'" ) << "." << std::endl;
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_u( 10 ), cuda_v( 10 );
cuda_u = 1.0;
cuda_v.evaluate( [] __cuda_callable__ ( int i )->double { return 2 * ( i % 2 ) - 1; } );
std::cout << "cuda_u = " << cuda_u << std::endl;
std::cout << "cuda_v = " << cuda_v << std::endl;
std::cout << "Comparison of cuda_u and cuda_v is: " << ( comparison( cuda_u, cuda_v ) ? "'true'" : "'false'" ) << "." << std::endl;
std::cout << "Comparison of cuda_u and cuda_u is: " << ( comparison( cuda_u, cuda_u ) ? "'true'" : "'false'" ) << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
ComparisonExample.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Array.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Containers/StaticVector.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
void scan( Vector< double, Device >& v )
{
/***
* Reduction is sum of two numbers.
*/
auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; };
/***
* As parameters, we pass vector on which the scan is to be performed, interval
* where the scan is performed, lambda function which is used by the scan and
* zero element (idempotent) of the 'sum' operation.
*/
Scan< Device, ScanType::Exclusive >::perform( v, 0, v.getSize(), reduce, 0.0 );
}
int main( int argc, char* argv[] )
{
/***
* Firstly, test the exclusive prefix sum with vectors allocated on CPU.
*/
Vector< double, Devices::Host > host_v( 10 );
host_v = 1.0;
std::cout << "host_v = " << host_v << std::endl;
scan( host_v );
std::cout << "The exclusive prefix sum of the host vector is " << host_v << "." << std::endl;
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( 10 );
cuda_v = 1.0;
std::cout << "cuda_v = " << cuda_v << std::endl;
scan( cuda_v );
std::cout << "The exclusive prefix sum of the CUDA vector is " << cuda_v << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
ExclusiveScanExample.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Timer.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Containers::Algorithms;
template< typename Device >
double mapReduce( Vector< double, Device >& u )
{
auto u_view = u.getView();
auto fetch = [=] __cuda_callable__ ( int i )->double {
return u_view[ i ] > 0 ? u_view[ i ] : 0.0; };
auto reduce = [] __cuda_callable__ ( const double& a, const double& b ) { return a + b; };
return Reduction< Device >::reduce( u_view.getSize(), reduce, fetch, 0.0 );
}
int main( int argc, char* argv[] )
{
Timer timer;
Vector< double, Devices::Host > host_u( 10 );
host_u.evaluate( [] __cuda_callable__ ( int i ) { return sin( ( double ) i ); } );
double result = mapReduce( host_u );
std::cout << "host_u = " << host_u << std::endl;
std::cout << "Sum of the positive numbers is:" << result << std::endl;
#ifdef HAVE_CUDA