Loading src/Benchmarks/Traversers/BenchmarkTraverserUserData.h +3 −1 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <TNL/Pointers/SharedPointer.h> namespace TNL { namespace Benchmarks { namespace Traversers { Loading src/Benchmarks/Traversers/CMakeLists.txt +6 −7 Original line number Diff line number Diff line # TODO: Split the benchmark into several files for faster build #if( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu ) #else() # ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp ) #endif() #install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu ) else() ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp ) endif() install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin ) src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h +0 −5 Original line number Diff line number Diff line Loading @@ -12,10 +12,6 @@ #pragma once #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" #include "SimpleCell.h" namespace TNL { namespace Benchmarks { namespace Traversers { Loading @@ -23,7 +19,6 @@ namespace TNL { template< typename Grid > class GridTraverserBenchmarkHelper{}; } // namespace Traversers } // namespace Benchmarks } // namespace TNL Loading src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h +12 −9 Original line number Diff line number Diff line Loading @@ -12,12 +12,15 @@ #pragma once #include <TNL/Functions/MeshFunctionView.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Meshes/Traverser.h> #include "GridTraverserBenchmarkHelper.h" #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" #include "SimpleCell.h" namespace TNL { namespace Benchmarks { namespace Traversers { Loading @@ -42,8 +45,8 @@ _GridTraverser1D( //typename GridType::CoordinatesType coordinates; GridEntity entity( *grid ); entity.getCoordinates().x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; //coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; //coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( entity.getCoordinates() <= end ) { entity.refresh(); Loading @@ -69,7 +72,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Host, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading Loading @@ -108,7 +111,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading @@ -122,7 +125,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index { #ifdef HAVE_CUDA dim3 blockSize( 256 ), blocksCount, gridsCount; Devices::Cuda::setupThreads( Cuda::setupThreads( blockSize, blocksCount, gridsCount, Loading @@ -131,7 +134,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ ) { dim3 gridSize; Devices::Cuda::setupGrid( Cuda::setupGrid( blocksCount, gridsCount, gridIdx, Loading src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h +10 −6 Original line number Diff line number Diff line Loading @@ -12,6 +12,10 @@ #pragma once #include <TNL/Functions/MeshFunctionView.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Meshes/Traverser.h> #include "GridTraverserBenchmarkHelper.h" #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" Loading Loading @@ -40,8 +44,8 @@ _GridTraverser2D( typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; GridEntity entity( *grid ); entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( entity.getCoordinates() <= end ) { entity.refresh(); Loading @@ -62,7 +66,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Host, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading Loading @@ -104,7 +108,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading @@ -118,7 +122,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index { #ifdef HAVE_CUDA dim3 blockSize( 16, 16 ), blocksCount, gridsCount; Devices::Cuda::setupThreads( Cuda::setupThreads( blockSize, blocksCount, gridsCount, Loading @@ -129,7 +133,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ ) { dim3 gridSize; Devices::Cuda::setupGrid( Cuda::setupGrid( blocksCount, gridsCount, gridIdx, Loading Loading
src/Benchmarks/Traversers/BenchmarkTraverserUserData.h +3 −1 Original line number Diff line number Diff line Loading @@ -12,6 +12,8 @@ #pragma once #include <TNL/Pointers/SharedPointer.h> namespace TNL { namespace Benchmarks { namespace Traversers { Loading
src/Benchmarks/Traversers/CMakeLists.txt +6 −7 Original line number Diff line number Diff line # TODO: Split the benchmark into several files for faster build #if( BUILD_CUDA ) # CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu ) #else() # ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp ) #endif() #install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cu ) else() ADD_EXECUTABLE( tnl-benchmark-traversers tnl-benchmark-traversers.cpp ) endif() install( TARGETS tnl-benchmark-traversers RUNTIME DESTINATION bin )
src/Benchmarks/Traversers/GridTraverserBenchmarkHelper.h +0 −5 Original line number Diff line number Diff line Loading @@ -12,10 +12,6 @@ #pragma once #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" #include "SimpleCell.h" namespace TNL { namespace Benchmarks { namespace Traversers { Loading @@ -23,7 +19,6 @@ namespace TNL { template< typename Grid > class GridTraverserBenchmarkHelper{}; } // namespace Traversers } // namespace Benchmarks } // namespace TNL Loading
src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_1D.h +12 −9 Original line number Diff line number Diff line Loading @@ -12,12 +12,15 @@ #pragma once #include <TNL/Functions/MeshFunctionView.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Meshes/Traverser.h> #include "GridTraverserBenchmarkHelper.h" #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" #include "SimpleCell.h" namespace TNL { namespace Benchmarks { namespace Traversers { Loading @@ -42,8 +45,8 @@ _GridTraverser1D( //typename GridType::CoordinatesType coordinates; GridEntity entity( *grid ); entity.getCoordinates().x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; //coordinates.x() = begin.x() + ( gridIdx * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; //coordinates.x() = begin.x() + ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( entity.getCoordinates() <= end ) { entity.refresh(); Loading @@ -69,7 +72,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Host, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading Loading @@ -108,7 +111,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading @@ -122,7 +125,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index { #ifdef HAVE_CUDA dim3 blockSize( 256 ), blocksCount, gridsCount; Devices::Cuda::setupThreads( Cuda::setupThreads( blockSize, blocksCount, gridsCount, Loading @@ -131,7 +134,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 1, Real, Devices::Cuda, Index for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ ) { dim3 gridSize; Devices::Cuda::setupGrid( Cuda::setupGrid( blocksCount, gridsCount, gridIdx, Loading
src/Benchmarks/Traversers/GridTraverserBenchmarkHelper_2D.h +10 −6 Original line number Diff line number Diff line Loading @@ -12,6 +12,10 @@ #pragma once #include <TNL/Functions/MeshFunctionView.h> #include <TNL/Pointers/SharedPointer.h> #include <TNL/Meshes/Traverser.h> #include "GridTraverserBenchmarkHelper.h" #include "AddOneEntitiesProcessor.h" #include "BenchmarkTraverserUserData.h" Loading Loading @@ -40,8 +44,8 @@ _GridTraverser2D( typedef Meshes::Grid< 2, Real, Devices::Cuda, Index > GridType; GridEntity entity( *grid ); entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Devices::Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Devices::Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; entity.getCoordinates().x() = begin.x() + ( gridIdx.x * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; entity.getCoordinates().y() = begin.y() + ( gridIdx.y * Cuda::getMaxGridSize() + blockIdx.y ) * blockDim.y + threadIdx.y; if( entity.getCoordinates() <= end ) { entity.refresh(); Loading @@ -62,7 +66,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Host, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading Loading @@ -104,7 +108,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index using RealType = typename GridType::RealType; using IndexType = typename GridType::IndexType; using CoordinatesType = typename GridType::CoordinatesType; using MeshFunction = Functions::MeshFunction< GridType >; using MeshFunction = Functions::MeshFunctionView< GridType >; using MeshFunctionPointer = Pointers::SharedPointer< MeshFunction >; using CellType = typename GridType::template EntityType< Dimension, Meshes::GridEntityNoStencilStorage >; using SimpleCellType = SimpleCell< GridType >; Loading @@ -118,7 +122,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index { #ifdef HAVE_CUDA dim3 blockSize( 16, 16 ), blocksCount, gridsCount; Devices::Cuda::setupThreads( Cuda::setupThreads( blockSize, blocksCount, gridsCount, Loading @@ -129,7 +133,7 @@ class GridTraverserBenchmarkHelper< Meshes::Grid< 2, Real, Devices::Cuda, Index for( gridIdx.x = 0; gridIdx.x < gridsCount.x; gridIdx.x++ ) { dim3 gridSize; Devices::Cuda::setupGrid( Cuda::setupGrid( blocksCount, gridsCount, gridIdx, Loading