Skip to content
Snippets Groups Projects
Commit 3bbf860d authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Added documentation for Segments namespace.

parent c2ee4999
No related branches found
No related tags found
1 merge request!105TO/matrices-adaptive-csr
ADD_SUBDIRECTORY( Segments )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( SortingExampleCuda SortingExample.cu)
ADD_CUSTOM_COMMAND( COMMAND SortingExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/SortingExample.out OUTPUT SortingExample.out )
......
File added
set( COMMON_EXAMPLES
SegmentsExample_General
)
if( BUILD_CUDA )
foreach( target IN ITEMS ${COMMON_EXAMPLES} )
cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS )
add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out )
endforeach()
else()
foreach( target IN ITEMS ${COMMON_EXAMPLES} )
add_executable( ${target} ${target}.cpp )
add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out )
set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out )
endforeach()
endif()
IF( BUILD_CUDA )
ADD_CUSTOM_TARGET( RunSegmentsExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} )
ELSE()
ADD_CUSTOM_TARGET( RunSegmentsExamples ALL DEPENDS ${HOST_OUTPUTS} )
ENDIF()
\ No newline at end of file
#include <iostream>
#include <functional>
#include <TNL/Containers/Vector.h>
#include <TNL/Algorithms/Segments/CSR.h>
#include <TNL/Algorithms/Segments/Ellpack.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
template< typename Segments >
void SegmentsExample()
{
using DeviceType = typename Segments::DeviceType;
using IndexType = typename Segments::IndexType;
/***
* Create segments with given segments sizes.
*/
Segments segments{ 1, 2, 3, 4, 5 };
std::cout << "Segments sizes are: " << segments << std::endl;
/***
* Allocate array for the segments;
*/
TNL::Containers::Array< double, DeviceType > data( segments.getStorageSize() );
data = 0.0;
/***
* Insert data into particular segments.
*/
auto data_view = data.getView();
segments.forAllElements( [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable {
if( localIdx <= segmentIdx )
data_view[ globalIdx ] = segmentIdx;
} );
/***
* Print the data managed by the segments.
*/
auto fetch = [=] __cuda_callable__ ( IndexType globalIdx ) -> double { return data_view[ globalIdx ]; };
printSegments( segments, fetch, std::cout );
/***
* Compute sums of elements in particular segments.
*/
TNL::Containers::Vector< double, DeviceType, IndexType > sums( segments.getSegmentsCount() );
auto sums_view = sums.getView();
auto sum_fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> double {
return data_view[ globalIdx ];
};
auto keep = [=] __cuda_callable__ ( const IndexType& segmentIdx, const double& value ) mutable {
sums_view[ segmentIdx ] = value;
};
segments.reduceAllSegments( sum_fetch, std::plus<>{}, keep, 0.0 );
std::cout << "The sums are: " << sums << std::endl;
}
int main( int argc, char* argv[] )
{
using HostCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Host, int >;
using HostEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Host, int >;
using CudaCSR = TNL::Algorithms::Segments::CSR< TNL::Devices::Cuda, int >;
using CudaEllpack = TNL::Algorithms::Segments::Ellpack< TNL::Devices::Cuda, int >;
std::cout << "Example of CSR segments on host: " << std::endl;
SegmentsExample< HostCSR >();
std::cout << "Example of Ellpack segments on host: " << std::endl;
SegmentsExample< HostEllpack >();
#ifdef HAVE_CUDA
std::cout << "Example of CSR segments on CUDA GPU: " << std::endl;
SegmentsExample< CudaCSR >();
std::cout << "Example of Ellpack segments on CUDA GPU: " << std::endl;
SegmentsExample< CudaEllpack >();
#endif
return EXIT_SUCCESS;
}
SegmentsExample_General.cpp
\ No newline at end of file
/***************************************************************************
_NamespaceDoxy.h - description
-------------------
begin : Apr 1, 2021
copyright : (C) 2021 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#pragma once
namespace TNL {
namespace Algorithms {
/**
* \brief Namespace holding segments data structures.
*Segments* represent data structure for manipulation with several local arrays (denoted also as segments)
having different size in general. All the local arrays are supposed to be allocated in one continuos global array.
The data structure segments offers mapping between indexes of particular local arrays and indexes
of the global array. In addition,one can perform parallel operations like for or flexible reduction on partical
local arrays.
A typical example for use of *segments* is implementation of sparse matrices. Sparse matrix like the following
\f[
\left(
\begin{array}{ccccc}
1 & 0 & 2 & 0 & 0 \\
0 & 0 & 5 & 0 & 0 \\
3 & 4 & 7 & 9 & 0 \\
0 & 0 & 0 & 0 & 12 \\
0 & 0 & 15 & 17 & 20
\end{array}
\right)
\f]
is usually first compressed which means that the zero elements are omitted to get the following "matrix":
\f[
\begin{array}{ccccc}
1 & 2 \\
5 \\
3 & 4 & 7 & 9 \\
12 \\
15 & 17 & 20
\end{array}
\f]
We have to store column index of each matrix elements as well in a "matrix" like this:
\f[
\begin{array}{ccccc}
0 & 2 \\
2 \\
0 & 1 & 2 & 3 \\
4 \\
2 & 3 & 4
\end{array}
\f]
Such "matrices" can be stored in memory in a row-wise manner in one contiguous array because of the performance reasons. The first "matrix" (i.e. values of the matrix elements)
would be stored as follows
\f[
\begin{array}{|cc|c|cccc|c|cc|} 1 & 2 & 5 & 3 & 4 & 7 & 9 & 12 & 15 & 17 & 20 \end{array}
\f]
and the second one (i.e. column indexes of the matrix values) as follows
\f[
\begin{array}{|cc|c|cccc|c|cc|} 0 & 2 & 2 & 0 & 1 & 2 & 3 & 4 & 2 & 3 & 4 \end{array}
\f]
What we see above is so called [CSR sparse matrix format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)).
It is the most popular format for storage of sparse matrices designed for high performance. However, it may not be the most efficient format for storage
of sparse matrices on GPUs. Therefore many other formats have been developed to get better performance. These formats often have different layout
of the matrix elements in the memory. They have to deal especially with two difficulties:
1. Efficient storage of matrix elements in the memory to fulfill the requirements of coalesced memory accesses on GPUs or good spatial locality
for efficient use of caches on CPUs.
2. Efficient mapping of GPU threads to different matrix rows.
Necessity of working with this kind of data structure is not limited only to sparse matrices. We could name at least few others:
1. Efficient storage of [graphs](https://en.wikipedia.org/wiki/Graph_(discrete_mathematics)) - one segment represents one graph node,
the elements in one segments are indexes of its neighbors.
2. [Unstructured numerical meshes](https://en.wikipedia.org/wiki/Types_of_mesh) - unstructured numerical mesh is a graph in fact.
3. [Particle in cell method](https://en.wikipedia.org/wiki/Particle-in-cell) - one segment represents one cell, the elements in one segment
are indexes of the particles.
4. [K-means clustering](https://en.wikipedia.org/wiki/K-means_clustering) - segments represent one cluster, the elements represent vectors
belonging to given cluster.
5. [Hashing](https://arxiv.org/abs/1907.02900) - segments are particular rows of the hash table, elements in segments corresponds with coliding
hashed elements.
In general, segments can be used for problems that somehow corresponds wit 2D data structure where each row can have different size and we need
to perform miscellaneous operations within the rows. The name *segments* comes from segmented parallel reduction or
[segmented scan (prefix-sum)](https://en.wikipedia.org/wiki/Segmented_scan).
The following example demonstrates the essence of *segments* in TNL:
\includelineno Algorithms/Segments/SegmentsExample_General.cpp
The result looks as follows:
\include SegmentsExample_General.out
*/
namespace Segments {
} // namespace Segments
} // namespace Algorithms
} // namespace TNL
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment