Update the heat equation solver (405faef7) · Commits · TNL / tnl-dev

src/Benchmarks/Convolution/CMakeLists.txt

+1 −2

Original line number	Diff line number	Diff line
		@@ -68,5 +68,4 @@ GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "k
		GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "kernels/sharedKernel.h")
		GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "kernels/sharedDataAndKernel.h")

		GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/naive.h")
		GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/sharedDataAndKernel.h")
		GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/heatEquationSharedData.h")

src/Benchmarks/Convolution/kernels/heatEquationSharedData.h

0 → 100644

+182 −0

Original line number	Diff line number	Diff line

		#ifdef HAVE_CUDA

		/**
		* This method stores image tile into shared memory
		* and then calculates convolution.
		*
		* Thanks for the idea https://www.evl.uic.edu/sjames/cs525/final.html
		*/

		#include <TNL/Devices/Cuda.h>
		#include <TNL/Containers/StaticVector.h>
		#include <TNL/Cuda/LaunchHelpers.h>
		#include <TNL/Cuda/SharedMemory.h>

		template< int Dimension, typename Device >
		struct Convolution;

		template< typename Index,
		typename Real,
		typename FetchData,
		typename FetchBoundary,
		typename Convolve,
		typename Store >
		__global__
		static void
		convolution2D( Index kernelWidth,
		Index kernelHeight,
		Index endX,
		Index endY,
		FetchData fetchData,
		FetchBoundary fetchBoundary,
		Convolve convolve,
		Store store )
		{
		Real* data = TNL::Cuda::getSharedMemory< Real >();

		const Index iy = threadIdx.y + blockIdx.y * blockDim.y;
		const Index ix = threadIdx.x + blockIdx.x * blockDim.x;

		const Index radiusY = kernelHeight >> 1;
		const Index radiusX = kernelWidth >> 1;

		const Index dataBlockWidth = 2 * kernelWidth - 1;
		const Index dataBlockHeight = 2 * kernelHeight - 1;

		const Index dataBlockRadiusX = dataBlockWidth >> 1;
		const Index dataBlockRadiusY = dataBlockHeight >> 1;

		Index x, y, index;

		// Top Left
		x = ix - radiusX;
		y = iy - radiusY;
		index = threadIdx.x + threadIdx.y * dataBlockWidth;

		if( x < 0 \|\| y < 0 \|\| x >= endX \|\| y >= endY ) {
		data[ index ] = fetchBoundary( x, y );
		}
		else {
		data[ index ] = fetchData( x, y );
		}

		// Top right
		x = ix + radiusX;
		y = iy - radiusY;
		index = dataBlockRadiusX + threadIdx.x + threadIdx.y * dataBlockWidth;

		if( x < 0 \|\| y < 0 \|\| x >= endX \|\| y >= endY ) {
		data[ index ] = fetchBoundary( x, y );
		}
		else {
		data[ index ] = fetchData( x, y );
		}

		// Bottom Left
		x = ix - radiusX;
		y = iy + radiusY;
		index = threadIdx.x + ( dataBlockRadiusY + threadIdx.y ) * dataBlockWidth;

		if(x < 0 \|\| y < 0 \|\| x >= endX \|\| y >= endY ) {
		data[ index ] = fetchBoundary( x, y );
		}
		else {
		data[ index ] = fetchData( x, y );
		}

		// Bottom Right
		x = ix + radiusX;
		y = iy + radiusY;
		index = dataBlockRadiusX + threadIdx.x + ( dataBlockRadiusY + threadIdx.y ) * dataBlockWidth;

		if( x < 0 \|\| y < 0 \|\| x >= endX \|\| y >= endY ) {
		data[ index ] = fetchBoundary( x, y );
		}
		else {
		data[ index ] = fetchData( x, y );
		}

		__syncthreads();

		if( ix >= endX \|\| iy >= endY )
		return;

		Real result = 0;

		for( Index j = 0; j < kernelHeight; j++ ) {
		Index align = ( j + threadIdx.y ) * dataBlockWidth;

		for( Index i = 0; i < kernelWidth; i++ ) {
		Index index = i + threadIdx.x + align;

		result = convolve( result, ix, iy, i, j, data[ index ]);
		}
		}

		store( ix, iy, result );
		}


		template<>
		struct Convolution< 2, TNL::Devices::Cuda >
		{
		public:
		template< typename Index >
		using Vector = TNL::Containers::StaticVector< 2, Index >;

		template< typename Index, typename Real >
		static void
		setup( TNL::Cuda::LaunchConfiguration& configuration, const Vector< Index >& dimensions, const Vector< Index >& kernelSize )
		{
		Index kernelElementCount = 1;

		for( Index i = 0; i < kernelSize.getSize(); i++ )
		kernelElementCount = ( 2 kernelSize[ i ] ) - 1;

		configuration.dynamicSharedMemorySize = kernelElementCount * sizeof( Real );

		configuration.blockSize.x = kernelSize.x();
		configuration.blockSize.y = kernelSize.y();

		configuration.gridSize.x =
		TNL::min( TNL::Cuda::getMaxGridSize(), TNL::Cuda::getNumberOfBlocks( dimensions.x(), configuration.blockSize.x ) );
		configuration.gridSize.y =
		TNL::min( TNL::Cuda::getMaxGridSize(), TNL::Cuda::getNumberOfBlocks( dimensions.y(), configuration.blockSize.y ) );
		}

		template< typename Index,
		typename Real,
		typename FetchData,
		typename FetchBoundary,
		typename Convolve,
		typename Store >
		static void
		execute( const Vector< Index >& dimensions,
		const Vector< Index >& kernelSize,
		FetchData&& fetchData,
		FetchBoundary&& fetchBoundary,
		Convolve&& convolve,
		Store&& store )
		{
		TNL::Cuda::LaunchConfiguration configuration;

		setup< Index, Real >( configuration, dimensions, kernelSize );

		constexpr auto kernel = convolution2D< Index, Real, FetchData, FetchBoundary, Convolve, Store >;

		TNL::Cuda::launchKernel< true >( kernel,
		0,
		configuration,
		kernelSize.x(),
		kernelSize.y(),
		dimensions.x(),
		dimensions.y(),
		fetchData,
		fetchBoundary,
		convolve,
		store );
		};
		};

		#endif

src/Benchmarks/Convolution/support/HeatEquationSolver.h

+27 −35

Original line number	Diff line number	Diff line
		@@ -2,7 +2,7 @@
		#pragma once

		#include "Solver.h"
		#include "DummyTask.h"
		#include "HeatEquationTask.h"

		#include <TNL/Algorithms/ParallelFor.h>
		#include <TNL/Timer.h>
		@@ -11,8 +11,8 @@ static std::vector< TNL::String > dimensionIds = { "grid-x-size", "grid-y-size"
		static std::vector< TNL::String > kernelSizeIds = { "kernel-x-size", "kernel-y-size" };
		static std::vector< TNL::String > domainIds = { "domain-x-size", "domain-y-size" };
		static std::string sigmaKey = "sigma";
		static std::string timestepKey = "timeStep";
		static std::string finalTimeKey = "finalTime";
		static std::string timeStepKey = "timeStep";
		static std::string timeKey = "time";
		static std::string outputFilenamePrefix = "outputFilenamePrefix";

		template< typename Real = double >
		@@ -64,16 +64,29 @@ public:
		result.setLike( function );
		result = 0;

		auto finalTime = parameters.getParameter< Real >( finalTimeKey );
		auto timeStep = parameters.getParameter< double >( timeStepKey );
		auto finalTime = parameters.getParameter< double >( timeKey );

		convolve( dimensions, domain, spaceSteps, kernelSize, function.getConstView(), result.getView(), finalTime );
		int iterationsCount = finalTime / timeStep;

		auto finalFilename = filenamePrefix + "_final.txt";
		double time = timeStep;

		if( ! writeGNUPlot( finalFilename, dimensions, spaceSteps, domain, result.getConstView() ) ) {
		for (int i = 1; i <= iterationsCount; i++) {
		printf("Time: %lf\n", time);

		convolve( dimensions, domain, kernelSize, function.getConstView(), result.getView(), time );

		auto filename = TNL::String("data_") + TNL::convertToString(i) + ".txt";

		if( ! writeGNUPlot( filename, dimensions, spaceSteps, domain, result.getConstView() ) ) {
		std::cout << "Did fail during file write";
		return;
		}

		result = 0;

		time += timeStep;
		}
		}

		virtual TNL::Config::ConfigDescription
		@@ -82,8 +95,8 @@ public:
		TNL::Config::ConfigDescription config = Base::makeInputConfig();

		config.addDelimiter( "Grid settings:" );
		config.addEntry< int >( dimensionIds[ 0 ], "Grid size along x-axis.", 100 );
		config.addEntry< int >( dimensionIds[ 1 ], "Grid size along y-axis.", 100 );
		config.addEntry< int >( dimensionIds[ 0 ], "Grid size along x-axis.", 200 );
		config.addEntry< int >( dimensionIds[ 1 ], "Grid size along y-axis.", 200 );

		config.addDelimiter( "Kernel settings:" );
		config.addEntry< int >( kernelSizeIds[ 0 ], "Kernel size along x-axis.", 3 );
		@@ -97,7 +110,8 @@ public:

		config.addEntry< Real >( sigmaKey, "Sigma in exponential initial condition.", 0.5);

		config.addEntry< Real >( finalTimeKey, "Final time of the simulation.", 0.12);
		config.addEntry< Real >( timeStepKey, "Time step of the simulation.", 0.005);
		config.addEntry< Real >( timeKey, "Final time of the simulation.", 0.36);

		return config;
		}
		@@ -136,34 +150,12 @@ public:
		void
		convolve( const Vector& dimensions,
		const Point& domain,
		const Point& spaceSteps,
		const Vector& kernelSize,
		typename DataStore::ConstViewType input,
		typename DataStore::ViewType result,
		const Real time ) const
		{
		HostDataStore kernel;

		kernel.resize( kernelSize.x() * kernelSize.y() );

		for( int j = 0; j < kernelSize.y(); j++ ) {
		for( int i = 0; i < kernelSize.x(); i++ ) {
		int index = i + j * kernelSize.x();

		auto x = i * spaceSteps.x() - domain.x() / 2.;
		auto y = j * spaceSteps.y() - domain.y() / 2.;

		kernel[ index ] = ( 1. / ( 4. * M_PI * time ) ) * exp( -( x * x + y * y ) / ( 4. * time ) );
		}
		}

		std::cout << kernel << std::endl;

		DataStore kernelDevice( kernel );

		auto kernelView = kernelDevice.getConstView();

		DummyTask< int, Real, Dimension, Device >::exec( dimensions, kernelSize, input, result, kernelView, 0);
		HeatEquationTask< int, Real, Dimension, Device >::exec( dimensions, kernelSize, domain, { 3., 3. }, time, input, result);
		}

		bool

src/Benchmarks/Convolution/support/HeatEquationTask.h

0 → 100644

+94 −0

Original line number	Diff line number	Diff line

		#pragma once

		template< int Dimension, typename Device >
		struct Convolution
		{
		template< typename Index >
		using Vector = TNL::Containers::StaticVector< Dimension, Index >;

		template< typename Index,
		typename Real,
		typename FetchData,
		typename FetchBoundary,
		typename Convolve,
		typename Store >
		static void
		execute( const Vector< Index >& dimensions,
		const Vector< Index >& kernelSize,
		FetchData&& fetchData,
		FetchBoundary&& fetchBoundary,
		Convolve&& convolve,
		Store&& store );
		};

		template< typename Index, typename Real, int Dimension, typename Device >
		struct HeatEquationTask;

		template< typename Index, typename Real >
		struct HeatEquationTask< Index, Real, 2, TNL::Devices::Cuda >
		{
		public:
		static constexpr int Dimension = 2;
		using Device = TNL::Devices::Cuda;
		using Vector = TNL::Containers::StaticVector< Dimension, Index >;
		using Point = TNL::Containers::StaticVector< Dimension, Real >;
		using ConstDataStore = typename TNL::Containers::Vector< Real, Device, Index >::ConstViewType;
		using DataStore = typename TNL::Containers::Vector< Real, Device, Index >::ViewType;
		using ConvolutionLauncher = Convolution< Dimension, Device >;

		static void
		exec( const Vector& dimensions,
		const Vector& kernelSize,
		const Point& functionDomain,
		const Point& kernelDomain,
		const Real time,
		ConstDataStore& input,
		DataStore& result)
		{
		auto functionSpaceSteps = Point(functionDomain.x() / dimensions.x(), functionDomain.y() / dimensions.y());
		auto kernelSpaceSteps = Point(kernelDomain.x() / kernelSize.x(), kernelDomain.y() / kernelSize.y());

		auto fetchData = [ = ] __cuda_callable__( Index i, Index j )
		{
		auto index = i + j * dimensions.x();

		return input[ index ];
		};

		auto fetchBoundary = [ = ] __cuda_callable__( Index i, Index j )
		{
		return 0;
		};

		auto convolve = [ = ] __cuda_callable__( Real result, Index dataX, Index dataY, Index kernelX, Index kernelY, Real data )
		{
		auto functionXPos = dataX * functionSpaceSteps.x() - (functionDomain.x() / 2),
		functionYPos = dataY * functionSpaceSteps.y() - (functionDomain.y() / 2);

		auto kernelXPos = (kernelX - kernelSize.x() / 2) * kernelSpaceSteps.x(),
		kernelYPos = (kernelY - kernelSize.y() / 2) * kernelSpaceSteps.y();

		auto deltaXPos = kernelXPos - functionXPos,
		deltaYPos = kernelYPos - functionYPos;

		auto kernel = kernelSpaceSteps.x() * kernelSpaceSteps.y() * ( (Real)1 / ( (Real)4 * M_PI * time ) ) * exp( - ( pow(deltaXPos, 2.) + pow(deltaYPos, 2.) ) / ( (Real)4 * time ) );

		return result + data * kernel;
		};

		auto store = [ = ] __cuda_callable__( Index i, Index j, Real resultValue ) mutable
		{
		auto index = i + j * dimensions.x();

		result[ index ] = resultValue;
		};

		ConvolutionLauncher::execute< Index, Real >( dimensions,
		kernelSize,
		std::forward< decltype( fetchData ) >( fetchData ),
		std::forward< decltype( fetchBoundary ) >( fetchBoundary ),
		std::forward< decltype( convolve ) >( convolve ),
		std::forward< decltype( store ) >( store ) );
		}
		};

src/Benchmarks/Convolution/kernels/sharedData.h

+4 −4

File changed.

Contains only whitespace changes.