Merge branch 'tutorials' into 'develop'

Tutorials See merge request !44

Merge branch 'tutorials' into 'develop'
9c6f9534 · Tomáš Oberhuber · b1055115 · 91166cb2 · 9c6f9534 · 9c6f9534
Commit 9c6f9534 authored 5 years ago by Tomáš Oberhuber
--- a/Documentation/Examples/Algorithms/CMakeLists.txt
+++ b/Documentation/Examples/Algorithms/CMakeLists.txt
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
+   ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
+ELSE()
+   ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
+   ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
+ENDIF()
+
+IF( BUILD_CUDA )
+ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS
+   ParallelForExample.out
+ )
+ELSE()
+ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
+   ParallelForExample.out
+ )
+ENDIF()
\ No newline at end of file
--- a/Documentation/Examples/Algorithms/ParallelForExample-2D.cpp
+++ b/Documentation/Examples/Algorithms/ParallelForExample-2D.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Algorithms;
+
+template< typename Device >
+void initMeshFunction( const int xSize,
+                       const int ySize,
+                       Vector< double, Device >& v,
+                       const double& c )
+{
+   auto view = v1.getConstView();
+   auto init = [=] __cuda_callable__  ( int i, int j, const int xSize, const double c ) mutable {
+      view[ j * xSize + i ] =  c; };
+   ParallelFor2D< Device >::exec( 0, 0, xSize, ySize, init, xSize, c );
+}
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Define dimensions of 2D mesh function.
+    */
+   const int xSize( 10 ), ySize( 10 );
+   const int size = xSize * ySize;
+
+   /***
+    * Firstly, test the mesh function initiation on CPU.
+    */
+   Vector< double, Devices::Host > host_v;
+   initMeshFunction( xSize, ySize, host_v, 1.0 );
+
+   /***
+    * And then also on GPU.
+    */
+#ifdef HAVE_CUDA
+   Vector< double, Devices::Cuda > cuda_v( size );
+   initMeshFunction( xSize, ySize, cuda_v, 1.0 );
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Algorithms/ParallelForExample-2D.cu
+++ b/Documentation/Examples/Algorithms/ParallelForExample-2D.cu
+ParallelForExample-2D.cpp
\ No newline at end of file
--- a/Documentation/Examples/Algorithms/ParallelForExample-3D.cpp
+++ b/Documentation/Examples/Algorithms/ParallelForExample-3D.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Algorithms;
+
+template< typename Device >
+void initMeshFunction( const int xSize,
+                       const int ySize,
+                       const int zSize,
+                       Vector< double, Device >& v,
+                       const double& c )
+{
+   auto view = v1.getConstView();
+   auto init = [=] __cuda_callable__  ( int i, int j, int k, const int xSize, const int ySize, const double c ) mutable {
+      view[ ( k * ySize + j ) * xSize + i ] =  c; };
+   ParallelFor3D< Device >::exec( 0, 0, xSize, ySize, init, xSize, ySize, c );
+}
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Define dimensions of 2D mesh function.
+    */
+   const int xSize( 10 ), ySize( 10 ), zSize( 10 );
+   const int size = xSize * ySize * zSize;
+
+   /***
+    * Firstly, test the mesh function initiation on CPU.
+    */
+   Vector< double, Devices::Host > host_v;
+   initMeshFunction( xSize, ySize, zSize, host_v, 1.0 );
+
+   /***
+    * And then also on GPU.
+    */
+#ifdef HAVE_CUDA
+   Vector< double, Devices::Cuda > cuda_v( size );
+   initMeshFunction( xSize, ySize, cuda_v, 1.0 );
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Algorithms/ParallelForExample-3D.cu
+++ b/Documentation/Examples/Algorithms/ParallelForExample-3D.cu
+ParallelForExample-3D.cpp
\ No newline at end of file
--- a/Documentation/Examples/Algorithms/ParallelForExample.cpp
+++ b/Documentation/Examples/Algorithms/ParallelForExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+
+/****
+ * Set all elements of the vector v to the constant c.
+ */
+template< typename Device >
+void initVector( Vector< double, Device >& v,
+                 const double& c )
+{
+   auto view = v.getView();
+   auto init = [=] __cuda_callable__  ( int i, const double c ) mutable {
+      view[ i ] = c; };
+
+   Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), init, c );
+}
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Firstly, test the vector initiation on CPU.
+    */
+   Vector< double, Devices::Host > host_v( 10 );
+   initVector( host_v, 1.0 );
+   std::cout << "host_v = " << host_v << std::endl;
+
+   /***
+    * And then also on GPU.
+    */
+#ifdef HAVE_CUDA
+   Vector< double, Devices::Cuda > cuda_v( 10 );
+   initVector( cuda_v, 1.0 );
+   std::cout << "cuda_v = " << cuda_v << std::endl;
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Algorithms/ParallelForExample.cu
+++ b/Documentation/Examples/Algorithms/ParallelForExample.cu
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Vector.h>
+#include <TNL/Algorithms/ParallelFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Algorithms;
+
+template< typename Device >
+void vectorSum( const Vector< double, Device >& v1,
+                const Vector< double, Device >& v2,
+                const double& c,
+                Vector< double, Device >& result )
+{
+   /****
+    * Get vectors view which can be captured by lambda.
+    */
+   auto v1_view = v1.getConstView();
+   auto v2_view = v2.getConstView();
+   auto result_view = result.getView();
+
+   /****
+    * The sum function.
+    */
+   auto sum = [=] __cuda_callable__  ( int i, const double c ) mutable {
+      result_view[ i ] = v1_view[ i ] + v2_view[ i ] + c; };
+
+      ParallelFor< Device >::exec( 0, v1.getSize(), sum, c );
+}
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Firstly, test the vectors sum on CPU.
+    */
+   Vector< double, Devices::Host > host_v1( 10 ), host_v2( 10 ), host_result( 10 );
+   host_v1 = 1.0;
+   host_v2.evaluate( []__cuda_callable__ ( int i )->double { return i; } );
+   vectorSum( host_v1, host_v2, 2.0, host_result );
+   std::cout << "host_v1 = " << host_v1 << std::endl;
+   std::cout << "host_v2 = " << host_v2 << std::endl;
+   std::cout << "The sum of the vectors on CPU is " << host_result << "." << std::endl;
+
+   /***
+    * And then also on GPU.
+    */
+#ifdef HAVE_CUDA
+   Vector< double, Devices::Cuda > cuda_v1( 10 ), cuda_v2( 10 ), cuda_result( 10 );
+   cuda_v1 = 1.0;
+   cuda_v2.evaluate( []__cuda_callable__ ( int i )->double { return i; } );
+   vectorSum( cuda_v1, cuda_v2, 2.0, cuda_result );
+   std::cout << "cuda_v1 = " << cuda_v1 << std::endl;
+   std::cout << "cuda_v2 = " << cuda_v2 << std::endl;
+   std::cout << "The sum of the vectors on GPU is " << cuda_result << "." << std::endl;
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Algorithms/StaticForExample.cpp
+++ b/Documentation/Examples/Algorithms/StaticForExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/StaticVector.h>
+#include <TNL/Algorithms/StaticFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+
+int main( int argc, char* argv[] )
+{
+   /****
+    * Create two static vectors
+    */
+   const int Size( 3 );
+   StaticVector< Size, double > a, b;
+   a = 1.0;
+   b = 2.0;
+   double sum( 0.0 );
+
+   /****
+    * Compute an addition of a vector and a constant number.
+    */
+   auto addition = [&]( int i, const double& c ) { a[ i ] = b[ i ] + c; sum += a[ i ]; };
+   Algorithms::StaticFor< 0, Size >::exec( addition, 3.14 );
+   std::cout << "a = " << a << std::endl;
+   std::cout << "sum = " << sum << std::endl;
+}
+
--- a/Documentation/Examples/Algorithms/TemplateStaticForExample.cpp
+++ b/Documentation/Examples/Algorithms/TemplateStaticForExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/StaticVector.h>
+#include <TNL/Algorithms/TemplateStaticFor.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+
+const int Size( 5 );
+
+template< int I >
+struct LoopBody
+{
+   static void exec( const StaticVector< Size, double >& v ) {
+      std::cout << "v[ " << I << " ] = " << v[ I ] << std::endl;
+   }
+};
+
+int main( int argc, char* argv[] )
+{
+   /****
+    * Initiate static vector
+    */
+   StaticVector< Size, double > v{ 1.0, 2.0, 3.0, 4.0, 5.0 };
+
+   /****
+    * Print out the vector using template parameters for indexing.
+    */
+   Algorithms::TemplateStaticFor< 0, Size, LoopBody >::exec( v );
+}
+
--- a/Documentation/Examples/CMakeLists.txt
+++ b/Documentation/Examples/CMakeLists.txt
+ADD_SUBDIRECTORY( Algorithms )
 ADD_SUBDIRECTORY( Containers )
+ADD_SUBDIRECTORY( Pointers )

 ADD_EXECUTABLE( FileExample FileExample.cpp )
 ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out )

--- a/Documentation/Examples/Containers/VectorExample.cpp
+++ b/Documentation/Examples/Containers/VectorExample.cpp
@@ -7,16 +7,15 @@ using namespace std;

 int main()
 {
-    Containers::Vector<int> vector1;
-    vector1.setSize(5);
-    vector1.setValue(0);
-    cout << "Does vector contain 1?" << vector1.containsValue(1) << endl;
-    cout << "Does vector contain only zeros?" << vector1.containsOnlyValue(0) << endl;
+    Containers::Vector<int> vector1( 5 );
+    vector1 = 0;
+    cout << "Does vector contain 1?" << vector1.containsValue( 1 ) << endl;
+    cout << "Does vector contain only zeros?" << vector1.containsOnlyValue( 0 ) << endl;

-    Containers::Vector<int> vector2(3);
-    vector2.setValue(1);
-    vector2.swap(vector1);
-    vector2.setElement(2,4);
+    Containers::Vector<int> vector2( 3 );
+    vector2 = 1;
+    vector2.swap( vector1 );
+    vector2.setElement( 2, 4 );

    cout << "First vector:" << vector1.getData() << endl;
    cout << "Second vector:" << vector2.getData() << endl;
@@ -24,10 +23,11 @@ int main()
    vector2.reset();
    cout << "Second vector after reset:" << vector2.getData() << endl;

-    /*Containers::Vector<int> vect = {1, 2, -3, 3};
-    cout << "The smallest element is:" << vect.min() << endl;
-    cout << "The absolute biggest element is:" << vect.absMax() << endl;
-    cout << "Sum of all vector elements:" << vect.sum() << endl;
-    vect.scalarMultiplication(2);*/
+    Containers::Vector<int> vect = { 1, 2, -3, 3 };
+    cout << "The smallest element is:" << min( vect ) << endl;
+    cout << "The absolute biggest element is:" << max( abs( vect ) ) << endl;
+    cout << "Sum of all vector elements:" << sum( vect ) << endl;
+    vect *= 2.0;
+    cout << "Vector multiplied by 2:" << vect << endl;
 }

--- a/Documentation/Examples/Pointers/CMakeLists.txt
+++ b/Documentation/Examples/Pointers/CMakeLists.txt
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE(UniquePointerExampleCuda UniquePointerExample.cu)
+   ADD_CUSTOM_COMMAND( COMMAND UniquePointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out )
+   CUDA_ADD_EXECUTABLE(SharedPointerExampleCuda SharedPointerExample.cu)
+   ADD_CUSTOM_COMMAND( COMMAND SharedPointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out )
+   CUDA_ADD_EXECUTABLE(DevicePointerExampleCuda DevicePointerExample.cu)
+   ADD_CUSTOM_COMMAND( COMMAND DevicePointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out )
+
+ADD_CUSTOM_TARGET( RunPointersExamples ALL DEPENDS
+   UniquePointerExample.out
+   SharedPointerExample.out
+   DevicePointerExample.out
+ )
+
+ENDIF()
--- a/Documentation/Examples/Pointers/DevicePointerExample.cpp
+++ b/Documentation/Examples/Pointers/DevicePointerExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Array.h>
+#include <TNL/Pointers/DevicePointer.h>
+
+using namespace TNL;
+
+using ArrayCuda = Containers::Array< int, Devices::Cuda >;
+
+struct Tuple
+{
+   Tuple( ArrayCuda& _a1, ArrayCuda& _a2 ):
+   a1( _a1 ), a2( _a2 ){};
+
+   Pointers::DevicePointer< ArrayCuda > a1, a2;
+};
+
+#ifdef HAVE_CUDA
+__global__ void printTuple( const Tuple t )
+{
+   printf( "Tuple size is: %d\n", t.a1->getSize() );
+   for( int i = 0; i < t.a1->getSize(); i++ )
+   {
+      printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] );
+      printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] );
+   }
+}
+#endif
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Create a tuple of arrays and print them in CUDA kernel
+    */
+#ifdef HAVE_CUDA
+   ArrayCuda a1( 3 ), a2( 3 );
+   Tuple t( a1, a2 );
+   a1 = 1;
+   a2 = 2;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printTuple<<< 1, 1 >>>( t );
+
+   /***
+    * Resize the arrays
+    */
+   a1.setSize( 5 );
+   a2.setSize( 5 );
+   a1 = 3;
+   a2 = 4;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printTuple<<< 1, 1 >>>( t );
+#endif
+   return EXIT_SUCCESS;
+
+}
+
--- a/Documentation/Examples/Pointers/DevicePointerExample.cu
+++ b/Documentation/Examples/Pointers/DevicePointerExample.cu
+DevicePointerExample.cpp
\ No newline at end of file
--- a/Documentation/Examples/Pointers/SharedPointerExample.cpp
+++ b/Documentation/Examples/Pointers/SharedPointerExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Array.h>
+#include <TNL/Pointers/SharedPointer.h>
+
+using namespace TNL;
+
+using ArrayCuda = Containers::Array< int, Devices::Cuda >;
+
+struct Tuple
+{
+   Tuple( const int size ):
+   a1( size ), a2( size ){};
+
+   void setSize( const int size )
+   {
+      a1->setSize( size );
+      a2->setSize( size );
+   }
+
+   Pointers::SharedPointer< ArrayCuda > a1, a2;
+};
+
+#ifdef HAVE_CUDA
+__global__ void printTuple( const Tuple t )
+{
+   printf( "Tuple size is: %d\n", t.a1->getSize() );
+   for( int i = 0; i < t.a1->getSize(); i++ )
+   {
+      printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] );
+      printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] );
+   }
+}
+#endif
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Create a tuple of arrays and print them in CUDA kernel
+    */
+#ifdef HAVE_CUDA
+   Tuple t( 3 );
+   *t.a1 = 1;
+   *t.a2 = 2;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printTuple<<< 1, 1 >>>( t );
+
+   /***
+    * Resize the arrays
+    */
+   t.setSize( 5 );
+   *t.a1 = 3;
+   *t.a2 = 4;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printTuple<<< 1, 1 >>>( t );
+#endif
+   return EXIT_SUCCESS;
+
+}
+
--- a/Documentation/Examples/Pointers/SharedPointerExample.cu
+++ b/Documentation/Examples/Pointers/SharedPointerExample.cu
+SharedPointerExample.cpp
\ No newline at end of file
--- a/Documentation/Examples/Pointers/UniquePointerExample.cpp
+++ b/Documentation/Examples/Pointers/UniquePointerExample.cpp
+#include <iostream>
+#include <cstdlib>
+#include <TNL/Containers/Array.h>
+#include <TNL/Pointers/UniquePointer.h>
+
+using namespace TNL;
+
+using ArrayCuda = Containers::Array< int, Devices::Cuda >;
+
+#ifdef HAVE_CUDA
+__global__ void printArray( const ArrayCuda* ptr )
+{
+   printf( "Array size is: %d\n", ptr->getSize() );
+   for( int i = 0; i < ptr->getSize(); i++ )
+      printf( "a[ %d ] = %d \n", i, ( *ptr )[ i ] );
+}
+#endif
+
+int main( int argc, char* argv[] )
+{
+   /***
+    * Create an array and print its elements in CUDA kernel
+    */
+#ifdef HAVE_CUDA
+   Pointers::UniquePointer< ArrayCuda > array_ptr( 10 );
+   array_ptr.modifyData< Devices::Host >() = 1;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printArray<<< 1, 1 >>>( &array_ptr.getData< Devices::Cuda >() );
+
+   /***
+    * Resize the array and print it again
+    */
+   array_ptr.modifyData< Devices::Host >().setSize( 5 );
+   array_ptr.modifyData< Devices::Host >() = 2;
+   std::cout << array_ptr.modifyData< Devices::Host >().getSize() << std::endl;
+   Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
+   printArray<<< 1, 1 >>>( &array_ptr.getData< Devices::Cuda >() );
+#endif
+   return EXIT_SUCCESS;
+}
+
--- a/Documentation/Examples/Pointers/UniquePointerExample.cu
+++ b/Documentation/Examples/Pointers/UniquePointerExample.cu
+UniquePointerExample.cpp
\ No newline at end of file
--- a/Documentation/Tutorials/Arrays/tutorial_01_Arrays.md
+++ b/Documentation/Tutorials/Arrays/tutorial_01_Arrays.md
-\page tutorial_01_arrays  Arrays tutorial
+\page tutorial_Arrays  Arrays tutorial

 ## Introduction


--- a/Documentation/Tutorials/BuildWithTNL/CMakeLists.txt
+++ b/Documentation/Tutorials/BuildWithTNL/CMakeLists.txt