Skip to content
Snippets Groups Projects
Commit 9c6f9534 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'tutorials' into 'develop'

Tutorials

See merge request !44
parents b1055115 91166cb2
No related branches found
No related tags found
1 merge request!44Tutorials
Showing
with 462 additions and 15 deletions
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE(ParallelForExampleCuda ParallelForExample.cu)
ADD_CUSTOM_COMMAND( COMMAND ParallelForExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
ELSE()
ADD_EXECUTABLE(ParallelForExample ParallelForExample.cpp)
ADD_CUSTOM_COMMAND( COMMAND ParallelForExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParallelForExample.out OUTPUT ParallelForExample.out )
ENDIF()
IF( BUILD_CUDA )
ADD_CUSTOM_TARGET( RunAlgorithmsExamples-cuda ALL DEPENDS
ParallelForExample.out
)
ELSE()
ADD_CUSTOM_TARGET( RunAlgorithmsExamples ALL DEPENDS
ParallelForExample.out
)
ENDIF()
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Algorithms;
template< typename Device >
void initMeshFunction( const int xSize,
const int ySize,
Vector< double, Device >& v,
const double& c )
{
auto view = v1.getConstView();
auto init = [=] __cuda_callable__ ( int i, int j, const int xSize, const double c ) mutable {
view[ j * xSize + i ] = c; };
ParallelFor2D< Device >::exec( 0, 0, xSize, ySize, init, xSize, c );
}
int main( int argc, char* argv[] )
{
/***
* Define dimensions of 2D mesh function.
*/
const int xSize( 10 ), ySize( 10 );
const int size = xSize * ySize;
/***
* Firstly, test the mesh function initiation on CPU.
*/
Vector< double, Devices::Host > host_v;
initMeshFunction( xSize, ySize, host_v, 1.0 );
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( size );
initMeshFunction( xSize, ySize, cuda_v, 1.0 );
#endif
return EXIT_SUCCESS;
}
ParallelForExample-2D.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Algorithms;
template< typename Device >
void initMeshFunction( const int xSize,
const int ySize,
const int zSize,
Vector< double, Device >& v,
const double& c )
{
auto view = v1.getConstView();
auto init = [=] __cuda_callable__ ( int i, int j, int k, const int xSize, const int ySize, const double c ) mutable {
view[ ( k * ySize + j ) * xSize + i ] = c; };
ParallelFor3D< Device >::exec( 0, 0, xSize, ySize, init, xSize, ySize, c );
}
int main( int argc, char* argv[] )
{
/***
* Define dimensions of 2D mesh function.
*/
const int xSize( 10 ), ySize( 10 ), zSize( 10 );
const int size = xSize * ySize * zSize;
/***
* Firstly, test the mesh function initiation on CPU.
*/
Vector< double, Devices::Host > host_v;
initMeshFunction( xSize, ySize, zSize, host_v, 1.0 );
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( size );
initMeshFunction( xSize, ySize, cuda_v, 1.0 );
#endif
return EXIT_SUCCESS;
}
ParallelForExample-3D.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
using namespace TNL;
using namespace TNL::Containers;
/****
* Set all elements of the vector v to the constant c.
*/
template< typename Device >
void initVector( Vector< double, Device >& v,
const double& c )
{
auto view = v.getView();
auto init = [=] __cuda_callable__ ( int i, const double c ) mutable {
view[ i ] = c; };
Algorithms::ParallelFor< Device >::exec( 0, v.getSize(), init, c );
}
int main( int argc, char* argv[] )
{
/***
* Firstly, test the vector initiation on CPU.
*/
Vector< double, Devices::Host > host_v( 10 );
initVector( host_v, 1.0 );
std::cout << "host_v = " << host_v << std::endl;
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v( 10 );
initVector( cuda_v, 1.0 );
std::cout << "cuda_v = " << cuda_v << std::endl;
#endif
return EXIT_SUCCESS;
}
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/ParallelFor.h>
using namespace TNL;
using namespace TNL::Containers;
using namespace TNL::Algorithms;
template< typename Device >
void vectorSum( const Vector< double, Device >& v1,
const Vector< double, Device >& v2,
const double& c,
Vector< double, Device >& result )
{
/****
* Get vectors view which can be captured by lambda.
*/
auto v1_view = v1.getConstView();
auto v2_view = v2.getConstView();
auto result_view = result.getView();
/****
* The sum function.
*/
auto sum = [=] __cuda_callable__ ( int i, const double c ) mutable {
result_view[ i ] = v1_view[ i ] + v2_view[ i ] + c; };
ParallelFor< Device >::exec( 0, v1.getSize(), sum, c );
}
int main( int argc, char* argv[] )
{
/***
* Firstly, test the vectors sum on CPU.
*/
Vector< double, Devices::Host > host_v1( 10 ), host_v2( 10 ), host_result( 10 );
host_v1 = 1.0;
host_v2.evaluate( []__cuda_callable__ ( int i )->double { return i; } );
vectorSum( host_v1, host_v2, 2.0, host_result );
std::cout << "host_v1 = " << host_v1 << std::endl;
std::cout << "host_v2 = " << host_v2 << std::endl;
std::cout << "The sum of the vectors on CPU is " << host_result << "." << std::endl;
/***
* And then also on GPU.
*/
#ifdef HAVE_CUDA
Vector< double, Devices::Cuda > cuda_v1( 10 ), cuda_v2( 10 ), cuda_result( 10 );
cuda_v1 = 1.0;
cuda_v2.evaluate( []__cuda_callable__ ( int i )->double { return i; } );
vectorSum( cuda_v1, cuda_v2, 2.0, cuda_result );
std::cout << "cuda_v1 = " << cuda_v1 << std::endl;
std::cout << "cuda_v2 = " << cuda_v2 << std::endl;
std::cout << "The sum of the vectors on GPU is " << cuda_result << "." << std::endl;
#endif
return EXIT_SUCCESS;
}
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/StaticVector.h>
#include <TNL/Algorithms/StaticFor.h>
using namespace TNL;
using namespace TNL::Containers;
int main( int argc, char* argv[] )
{
/****
* Create two static vectors
*/
const int Size( 3 );
StaticVector< Size, double > a, b;
a = 1.0;
b = 2.0;
double sum( 0.0 );
/****
* Compute an addition of a vector and a constant number.
*/
auto addition = [&]( int i, const double& c ) { a[ i ] = b[ i ] + c; sum += a[ i ]; };
Algorithms::StaticFor< 0, Size >::exec( addition, 3.14 );
std::cout << "a = " << a << std::endl;
std::cout << "sum = " << sum << std::endl;
}
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/StaticVector.h>
#include <TNL/Algorithms/TemplateStaticFor.h>
using namespace TNL;
using namespace TNL::Containers;
const int Size( 5 );
template< int I >
struct LoopBody
{
static void exec( const StaticVector< Size, double >& v ) {
std::cout << "v[ " << I << " ] = " << v[ I ] << std::endl;
}
};
int main( int argc, char* argv[] )
{
/****
* Initiate static vector
*/
StaticVector< Size, double > v{ 1.0, 2.0, 3.0, 4.0, 5.0 };
/****
* Print out the vector using template parameters for indexing.
*/
Algorithms::TemplateStaticFor< 0, Size, LoopBody >::exec( v );
}
ADD_SUBDIRECTORY( Algorithms )
ADD_SUBDIRECTORY( Containers )
ADD_SUBDIRECTORY( Pointers )
ADD_EXECUTABLE( FileExample FileExample.cpp )
ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out )
......
......@@ -7,16 +7,15 @@ using namespace std;
int main()
{
Containers::Vector<int> vector1;
vector1.setSize(5);
vector1.setValue(0);
cout << "Does vector contain 1?" << vector1.containsValue(1) << endl;
cout << "Does vector contain only zeros?" << vector1.containsOnlyValue(0) << endl;
Containers::Vector<int> vector1( 5 );
vector1 = 0;
cout << "Does vector contain 1?" << vector1.containsValue( 1 ) << endl;
cout << "Does vector contain only zeros?" << vector1.containsOnlyValue( 0 ) << endl;
Containers::Vector<int> vector2(3);
vector2.setValue(1);
vector2.swap(vector1);
vector2.setElement(2,4);
Containers::Vector<int> vector2( 3 );
vector2 = 1;
vector2.swap( vector1 );
vector2.setElement( 2, 4 );
cout << "First vector:" << vector1.getData() << endl;
cout << "Second vector:" << vector2.getData() << endl;
......@@ -24,10 +23,11 @@ int main()
vector2.reset();
cout << "Second vector after reset:" << vector2.getData() << endl;
/*Containers::Vector<int> vect = {1, 2, -3, 3};
cout << "The smallest element is:" << vect.min() << endl;
cout << "The absolute biggest element is:" << vect.absMax() << endl;
cout << "Sum of all vector elements:" << vect.sum() << endl;
vect.scalarMultiplication(2);*/
Containers::Vector<int> vect = { 1, 2, -3, 3 };
cout << "The smallest element is:" << min( vect ) << endl;
cout << "The absolute biggest element is:" << max( abs( vect ) ) << endl;
cout << "Sum of all vector elements:" << sum( vect ) << endl;
vect *= 2.0;
cout << "Vector multiplied by 2:" << vect << endl;
}
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE(UniquePointerExampleCuda UniquePointerExample.cu)
ADD_CUSTOM_COMMAND( COMMAND UniquePointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/UniquePointerExample.out OUTPUT UniquePointerExample.out )
CUDA_ADD_EXECUTABLE(SharedPointerExampleCuda SharedPointerExample.cu)
ADD_CUSTOM_COMMAND( COMMAND SharedPointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SharedPointerExample.out OUTPUT SharedPointerExample.out )
CUDA_ADD_EXECUTABLE(DevicePointerExampleCuda DevicePointerExample.cu)
ADD_CUSTOM_COMMAND( COMMAND DevicePointerExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DevicePointerExample.out OUTPUT DevicePointerExample.out )
ADD_CUSTOM_TARGET( RunPointersExamples ALL DEPENDS
UniquePointerExample.out
SharedPointerExample.out
DevicePointerExample.out
)
ENDIF()
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Array.h>
#include <TNL/Pointers/DevicePointer.h>
using namespace TNL;
using ArrayCuda = Containers::Array< int, Devices::Cuda >;
struct Tuple
{
Tuple( ArrayCuda& _a1, ArrayCuda& _a2 ):
a1( _a1 ), a2( _a2 ){};
Pointers::DevicePointer< ArrayCuda > a1, a2;
};
#ifdef HAVE_CUDA
__global__ void printTuple( const Tuple t )
{
printf( "Tuple size is: %d\n", t.a1->getSize() );
for( int i = 0; i < t.a1->getSize(); i++ )
{
printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] );
printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] );
}
}
#endif
int main( int argc, char* argv[] )
{
/***
* Create a tuple of arrays and print them in CUDA kernel
*/
#ifdef HAVE_CUDA
ArrayCuda a1( 3 ), a2( 3 );
Tuple t( a1, a2 );
a1 = 1;
a2 = 2;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printTuple<<< 1, 1 >>>( t );
/***
* Resize the arrays
*/
a1.setSize( 5 );
a2.setSize( 5 );
a1 = 3;
a2 = 4;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printTuple<<< 1, 1 >>>( t );
#endif
return EXIT_SUCCESS;
}
DevicePointerExample.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Array.h>
#include <TNL/Pointers/SharedPointer.h>
using namespace TNL;
using ArrayCuda = Containers::Array< int, Devices::Cuda >;
struct Tuple
{
Tuple( const int size ):
a1( size ), a2( size ){};
void setSize( const int size )
{
a1->setSize( size );
a2->setSize( size );
}
Pointers::SharedPointer< ArrayCuda > a1, a2;
};
#ifdef HAVE_CUDA
__global__ void printTuple( const Tuple t )
{
printf( "Tuple size is: %d\n", t.a1->getSize() );
for( int i = 0; i < t.a1->getSize(); i++ )
{
printf( "a1[ %d ] = %d \n", i, ( *t.a1 )[ i ] );
printf( "a2[ %d ] = %d \n", i, ( *t.a2 )[ i ] );
}
}
#endif
int main( int argc, char* argv[] )
{
/***
* Create a tuple of arrays and print them in CUDA kernel
*/
#ifdef HAVE_CUDA
Tuple t( 3 );
*t.a1 = 1;
*t.a2 = 2;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printTuple<<< 1, 1 >>>( t );
/***
* Resize the arrays
*/
t.setSize( 5 );
*t.a1 = 3;
*t.a2 = 4;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printTuple<<< 1, 1 >>>( t );
#endif
return EXIT_SUCCESS;
}
SharedPointerExample.cpp
\ No newline at end of file
#include <iostream>
#include <cstdlib>
#include <TNL/Containers/Array.h>
#include <TNL/Pointers/UniquePointer.h>
using namespace TNL;
using ArrayCuda = Containers::Array< int, Devices::Cuda >;
#ifdef HAVE_CUDA
__global__ void printArray( const ArrayCuda* ptr )
{
printf( "Array size is: %d\n", ptr->getSize() );
for( int i = 0; i < ptr->getSize(); i++ )
printf( "a[ %d ] = %d \n", i, ( *ptr )[ i ] );
}
#endif
int main( int argc, char* argv[] )
{
/***
* Create an array and print its elements in CUDA kernel
*/
#ifdef HAVE_CUDA
Pointers::UniquePointer< ArrayCuda > array_ptr( 10 );
array_ptr.modifyData< Devices::Host >() = 1;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printArray<<< 1, 1 >>>( &array_ptr.getData< Devices::Cuda >() );
/***
* Resize the array and print it again
*/
array_ptr.modifyData< Devices::Host >().setSize( 5 );
array_ptr.modifyData< Devices::Host >() = 2;
std::cout << array_ptr.modifyData< Devices::Host >().getSize() << std::endl;
Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >();
printArray<<< 1, 1 >>>( &array_ptr.getData< Devices::Cuda >() );
#endif
return EXIT_SUCCESS;
}
UniquePointerExample.cpp
\ No newline at end of file
\page tutorial_01_arrays Arrays tutorial
\page tutorial_Arrays Arrays tutorial
## Introduction
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment