Loading src/Benchmarks/Convolution/CMakeLists.txt +5 −0 Original line number Diff line number Diff line Loading @@ -24,3 +24,8 @@ endfunction() GENERATE_CUDA_EXECUTABLE("Convolution" 1 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 2 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 3 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 1 "templates/main_benchmark.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 2 "templates/main_benchmark.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 3 "templates/main_benchmark.h" "kernels/naive.h") src/Benchmarks/Convolution/support/Benchmark.h +5 −6 Original line number Diff line number Diff line Loading @@ -14,10 +14,10 @@ template< int Dimension, typename Device > class Benchmark { public: using Benchmark = typename TNL::Benchmarks::Benchmark<>; using TNLBenchmark = typename TNL::Benchmarks::Benchmark<>; void runBenchmark( const TNL::Config::ParameterContainer& parameters ) const run( const TNL::Config::ParameterContainer& parameters ) const { if( ! TNL::Devices::Host::setup( parameters ) || ! TNL::Devices::Cuda::setup( parameters ) ) return; Loading @@ -36,7 +36,7 @@ public: std::ofstream logFile( logFileName.getString(), mode ); Benchmark benchmark( logFile, loops, verbose ); TNLBenchmark benchmark( logFile, loops, verbose ); std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); Loading @@ -44,8 +44,8 @@ public: start(benchmark, parameters); } virtual void start(const Benchmark& benchmark, const TNL::Config::ParameterContainer& parameters) const { TNL_ASSERT_TRUE(false, << "Should be overriden"); virtual void start( TNLBenchmark& benchmark, const TNL::Config::ParameterContainer& parameters) const { TNL_ASSERT_TRUE(false, "Should be overriden"); } virtual TNL::Config::ConfigDescription makeInputConfig() const { Loading @@ -69,7 +69,6 @@ public: config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addDelimiter( "Device settings:" ); TNL::Devices::Host::configSetup( config ); Loading src/Benchmarks/Convolution/support/DummyBenchmark.h +19 −18 Original line number Diff line number Diff line Loading @@ -16,12 +16,12 @@ class DummyBenchmark : public Benchmark< Dimension, Device > { public: using Vector = TNL::Containers::StaticVector< Dimension, int >; using DataStore = TNL::Containers::Array< int, Device, float >; using Benchmark = Base::Benchmark; using DataStore = TNL::Containers::Array< float, Device, int >; using Base = Benchmark< Dimension, Device >; using TNLBenchmark = typename Base::TNLBenchmark; virtual void start( const Benchmark& benchmark, const TNL::Config::ParameterContainer& parameters ) const override start( TNLBenchmark& benchmark, const TNL::Config::ParameterContainer& parameters ) const override { Vector start; Vector end; Loading Loading @@ -53,7 +53,7 @@ public: } virtual void time( Benchmark& bencmark, time( TNLBenchmark& benchmark, const Vector& minDimension, const Vector& maxDimension, const int dimensionStep, Loading @@ -68,14 +68,14 @@ public: currentKernelSize = minKernelSize; do { time( benchmark, currentDimension, currentKernelSize ); timeConvolution( benchmark, currentDimension, currentKernelSize ); currentKernelSize[ 0 ] += kernelStep; for( size_t i = 0; i < currentKernelSize.getSize() - 1; i++ ) { if( currentKernelSize[ i ] >= maxKernelSize[ i ] ) { currentKernelSize[ i ] = minKernelSize[ i ]; maxKernelSize[ i + 1 ] += kernelStep; currentKernelSize[ i + 1 ] += kernelStep; } } } while( currentKernelSize < maxKernelSize ); Loading @@ -85,7 +85,7 @@ public: for( size_t i = 0; i < currentDimension.getSize() - 1; i++ ) { if( currentDimension[ i ] >= maxDimension[ i ] ) { currentDimension[ i ] = minDimension[ i ]; maxDimension[ i ] = maxDimension[ i ]; currentDimension[ i ] = maxDimension[ i ]; } } Loading @@ -93,11 +93,11 @@ public: } void timeConvolution( Benchmark& benchmark, const Vector& dimension, const Vector& kernelSize ) const timeConvolution( TNLBenchmark& benchmark, const Vector& dimension, const Vector& kernelSize ) const { auto device = TNL::getType< Device >(); Benchmark::MetadataColumns columns = {}; typename TNLBenchmark::MetadataColumns columns; size_t elementsCount = 1; size_t kernelElementsCount = 1; Loading @@ -106,18 +106,19 @@ public: elementsCount *= dimension[ i ]; kernelElementsCount *= kernelSize[ i ]; columns.insert( { dimensionIds[ i ], dimension[ i ] } ); columns.insert( { kernelSizeIds[ i ], kernelSize[ i ] } ); columns.push_back( { dimensionIds[ i ], TNL::convertToString(dimension[ i ]) } ); columns.push_back( { kernelSizeIds[ i ], TNL::convertToString(kernelSize[ i ]) } ); } benchmark.setDatasetSize( ( elementsCount * 4 ) / 1.e9, 1.0 ); benchmark.setMetadataColumns( columns ); // Setup input data DataStore input, result, kernel; input.resize( elementsCount ); result.resize( elementsCount ); kernel.resize( kernelSize ); kernel.resize( kernelElementsCount ); input = 1; result = 1; Loading @@ -129,24 +130,24 @@ public: auto measure = [ & ]() { DummyTask<Dimension, Device>::exec(dimension, kernelSize, inputView, resultView, kernelView); DummyTask<int, float, Dimension, Device>::exec(dimension, kernelSize, inputView, resultView, kernelView); }; benchmark.time< Device >( device, measure ); benchmark.template time<Device>( device, measure ); } TNL::Config::ConfigDescription makeInputConfig() const override { auto config = Base::makeInputConfig(); TNL::Config::ConfigDescription config = Base::makeInputConfig(); config.addDelimiter( "Grid dimension settings:" ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( minDimensionIds[ i ], minDimensionIds[ i ], 512 ); config.addEntry< int >( minDimensionIds[ i ], minDimensionIds[ i ], 16 ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( maxDimensionIds[ i ], maxDimensionIds[ i ], 512 ); config.addEntry< int >( maxDimensionIds[ i ], maxDimensionIds[ i ], 128 ); config.addEntry< int >( "dimension-step", "Step of kernel increase by which dimension is multiplied (must be even)", 2 ); Loading @@ -156,7 +157,7 @@ public: config.addEntry< int >( minKernelSizeIds[ i ], minKernelSizeIds[ i ] + " (odd) :", 1 ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( minKernelSizeIds[ i ], minKernelSizeIds[ i ] + " (odd) :", 11 ); config.addEntry< int >( maxKernelSizeIds[ i ], maxKernelSizeIds[ i ] + " (odd) :", 11 ); config.addEntry< int >( "kernel-step", "Step of kernel increase which is added to kernel (must be even)", 2 ); Loading src/Benchmarks/Convolution/templates/main_benchmark.h +25 −0 Original line number Diff line number Diff line #include "../kernels/naive.h" #include "../support/DummyBenchmark.h" #include <TNL/Config/parseCommandLine.h> #define DIMENSION DIMENSION_VALUE using TaskBenchmark = DummyBenchmark< DIMENSION, TNL::Devices::Cuda >; int main(int argc, char* argv[]) { TaskBenchmark benchmark; auto config = benchmark.makeInputConfig(); TNL::Config::ParameterContainer parameters; if( ! parseCommandLine( argc, argv, config, parameters ) ) return EXIT_FAILURE; benchmark.run( parameters ); return 0; } Loading
src/Benchmarks/Convolution/CMakeLists.txt +5 −0 Original line number Diff line number Diff line Loading @@ -24,3 +24,8 @@ endfunction() GENERATE_CUDA_EXECUTABLE("Convolution" 1 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 2 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 3 "templates/main_solver.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 1 "templates/main_benchmark.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 2 "templates/main_benchmark.h" "kernels/naive.h") GENERATE_CUDA_EXECUTABLE("Convolution" 3 "templates/main_benchmark.h" "kernels/naive.h")
src/Benchmarks/Convolution/support/Benchmark.h +5 −6 Original line number Diff line number Diff line Loading @@ -14,10 +14,10 @@ template< int Dimension, typename Device > class Benchmark { public: using Benchmark = typename TNL::Benchmarks::Benchmark<>; using TNLBenchmark = typename TNL::Benchmarks::Benchmark<>; void runBenchmark( const TNL::Config::ParameterContainer& parameters ) const run( const TNL::Config::ParameterContainer& parameters ) const { if( ! TNL::Devices::Host::setup( parameters ) || ! TNL::Devices::Cuda::setup( parameters ) ) return; Loading @@ -36,7 +36,7 @@ public: std::ofstream logFile( logFileName.getString(), mode ); Benchmark benchmark( logFile, loops, verbose ); TNLBenchmark benchmark( logFile, loops, verbose ); std::map< std::string, std::string > metadata = TNL::Benchmarks::getHardwareMetadata(); TNL::Benchmarks::writeMapAsJson( metadata, logFileName, ".metadata.json" ); Loading @@ -44,8 +44,8 @@ public: start(benchmark, parameters); } virtual void start(const Benchmark& benchmark, const TNL::Config::ParameterContainer& parameters) const { TNL_ASSERT_TRUE(false, << "Should be overriden"); virtual void start( TNLBenchmark& benchmark, const TNL::Config::ParameterContainer& parameters) const { TNL_ASSERT_TRUE(false, "Should be overriden"); } virtual TNL::Config::ConfigDescription makeInputConfig() const { Loading @@ -69,7 +69,6 @@ public: config.addEntry< int >( "loops", "Number of iterations for every computation.", 10 ); config.addEntry< int >( "verbose", "Verbose mode.", 1 ); config.addDelimiter( "Device settings:" ); TNL::Devices::Host::configSetup( config ); Loading
src/Benchmarks/Convolution/support/DummyBenchmark.h +19 −18 Original line number Diff line number Diff line Loading @@ -16,12 +16,12 @@ class DummyBenchmark : public Benchmark< Dimension, Device > { public: using Vector = TNL::Containers::StaticVector< Dimension, int >; using DataStore = TNL::Containers::Array< int, Device, float >; using Benchmark = Base::Benchmark; using DataStore = TNL::Containers::Array< float, Device, int >; using Base = Benchmark< Dimension, Device >; using TNLBenchmark = typename Base::TNLBenchmark; virtual void start( const Benchmark& benchmark, const TNL::Config::ParameterContainer& parameters ) const override start( TNLBenchmark& benchmark, const TNL::Config::ParameterContainer& parameters ) const override { Vector start; Vector end; Loading Loading @@ -53,7 +53,7 @@ public: } virtual void time( Benchmark& bencmark, time( TNLBenchmark& benchmark, const Vector& minDimension, const Vector& maxDimension, const int dimensionStep, Loading @@ -68,14 +68,14 @@ public: currentKernelSize = minKernelSize; do { time( benchmark, currentDimension, currentKernelSize ); timeConvolution( benchmark, currentDimension, currentKernelSize ); currentKernelSize[ 0 ] += kernelStep; for( size_t i = 0; i < currentKernelSize.getSize() - 1; i++ ) { if( currentKernelSize[ i ] >= maxKernelSize[ i ] ) { currentKernelSize[ i ] = minKernelSize[ i ]; maxKernelSize[ i + 1 ] += kernelStep; currentKernelSize[ i + 1 ] += kernelStep; } } } while( currentKernelSize < maxKernelSize ); Loading @@ -85,7 +85,7 @@ public: for( size_t i = 0; i < currentDimension.getSize() - 1; i++ ) { if( currentDimension[ i ] >= maxDimension[ i ] ) { currentDimension[ i ] = minDimension[ i ]; maxDimension[ i ] = maxDimension[ i ]; currentDimension[ i ] = maxDimension[ i ]; } } Loading @@ -93,11 +93,11 @@ public: } void timeConvolution( Benchmark& benchmark, const Vector& dimension, const Vector& kernelSize ) const timeConvolution( TNLBenchmark& benchmark, const Vector& dimension, const Vector& kernelSize ) const { auto device = TNL::getType< Device >(); Benchmark::MetadataColumns columns = {}; typename TNLBenchmark::MetadataColumns columns; size_t elementsCount = 1; size_t kernelElementsCount = 1; Loading @@ -106,18 +106,19 @@ public: elementsCount *= dimension[ i ]; kernelElementsCount *= kernelSize[ i ]; columns.insert( { dimensionIds[ i ], dimension[ i ] } ); columns.insert( { kernelSizeIds[ i ], kernelSize[ i ] } ); columns.push_back( { dimensionIds[ i ], TNL::convertToString(dimension[ i ]) } ); columns.push_back( { kernelSizeIds[ i ], TNL::convertToString(kernelSize[ i ]) } ); } benchmark.setDatasetSize( ( elementsCount * 4 ) / 1.e9, 1.0 ); benchmark.setMetadataColumns( columns ); // Setup input data DataStore input, result, kernel; input.resize( elementsCount ); result.resize( elementsCount ); kernel.resize( kernelSize ); kernel.resize( kernelElementsCount ); input = 1; result = 1; Loading @@ -129,24 +130,24 @@ public: auto measure = [ & ]() { DummyTask<Dimension, Device>::exec(dimension, kernelSize, inputView, resultView, kernelView); DummyTask<int, float, Dimension, Device>::exec(dimension, kernelSize, inputView, resultView, kernelView); }; benchmark.time< Device >( device, measure ); benchmark.template time<Device>( device, measure ); } TNL::Config::ConfigDescription makeInputConfig() const override { auto config = Base::makeInputConfig(); TNL::Config::ConfigDescription config = Base::makeInputConfig(); config.addDelimiter( "Grid dimension settings:" ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( minDimensionIds[ i ], minDimensionIds[ i ], 512 ); config.addEntry< int >( minDimensionIds[ i ], minDimensionIds[ i ], 16 ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( maxDimensionIds[ i ], maxDimensionIds[ i ], 512 ); config.addEntry< int >( maxDimensionIds[ i ], maxDimensionIds[ i ], 128 ); config.addEntry< int >( "dimension-step", "Step of kernel increase by which dimension is multiplied (must be even)", 2 ); Loading @@ -156,7 +157,7 @@ public: config.addEntry< int >( minKernelSizeIds[ i ], minKernelSizeIds[ i ] + " (odd) :", 1 ); for( int i = 0; i < Dimension; i++ ) config.addEntry< int >( minKernelSizeIds[ i ], minKernelSizeIds[ i ] + " (odd) :", 11 ); config.addEntry< int >( maxKernelSizeIds[ i ], maxKernelSizeIds[ i ] + " (odd) :", 11 ); config.addEntry< int >( "kernel-step", "Step of kernel increase which is added to kernel (must be even)", 2 ); Loading
src/Benchmarks/Convolution/templates/main_benchmark.h +25 −0 Original line number Diff line number Diff line #include "../kernels/naive.h" #include "../support/DummyBenchmark.h" #include <TNL/Config/parseCommandLine.h> #define DIMENSION DIMENSION_VALUE using TaskBenchmark = DummyBenchmark< DIMENSION, TNL::Devices::Cuda >; int main(int argc, char* argv[]) { TaskBenchmark benchmark; auto config = benchmark.makeInputConfig(); TNL::Config::ParameterContainer parameters; if( ! parseCommandLine( argc, argv, config, parameters ) ) return EXIT_FAILURE; benchmark.run( parameters ); return 0; }