Commit 405faef7 authored by Yury Hayeu's avatar Yury Hayeu
Browse files

Update the heat equation solver

parent 053bc398
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -68,5 +68,4 @@ GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "k
GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "kernels/sharedKernel.h")
GENERATE_CUDA_EXECUTABLE("ImageConvolution" 2 "templates/main_image_solver.h" "kernels/sharedDataAndKernel.h")

GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/naive.h")
GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/sharedDataAndKernel.h")
GENERATE_CUDA_EXECUTABLE("HeatEquation" 2 "templates/main_heat_equation_solver.h" "kernels/heatEquationSharedData.h")
+182 −0
Original line number Diff line number Diff line

#ifdef HAVE_CUDA

/**
 * This method stores image tile into shared memory
 * and then calculates convolution.
 *
 * Thanks for the idea https://www.evl.uic.edu/sjames/cs525/final.html
 */

#include <TNL/Devices/Cuda.h>
#include <TNL/Containers/StaticVector.h>
#include <TNL/Cuda/LaunchHelpers.h>
#include <TNL/Cuda/SharedMemory.h>

template< int Dimension, typename Device >
struct Convolution;

template< typename Index,
          typename Real,
          typename FetchData,
          typename FetchBoundary,
          typename Convolve,
          typename Store >
__global__
static void
convolution2D( Index kernelWidth,
               Index kernelHeight,
               Index endX,
               Index endY,
               FetchData fetchData,
               FetchBoundary fetchBoundary,
               Convolve convolve,
               Store store )
{
   Real* data = TNL::Cuda::getSharedMemory< Real >();

   const Index iy = threadIdx.y + blockIdx.y * blockDim.y;
   const Index ix = threadIdx.x + blockIdx.x * blockDim.x;

   const Index radiusY = kernelHeight >> 1;
   const Index radiusX = kernelWidth >> 1;

   const Index dataBlockWidth = 2 * kernelWidth - 1;
   const Index dataBlockHeight = 2 * kernelHeight - 1;

   const Index dataBlockRadiusX = dataBlockWidth >> 1;
   const Index dataBlockRadiusY = dataBlockHeight >> 1;

   Index x, y, index;

   // Top Left
   x = ix - radiusX;
   y = iy - radiusY;
   index = threadIdx.x + threadIdx.y * dataBlockWidth;

   if( x < 0 || y < 0 || x >= endX || y >= endY ) {
      data[ index ] = fetchBoundary( x, y );
   }
   else {
      data[ index ] = fetchData( x, y );
   }

   // Top right
   x = ix + radiusX;
   y = iy - radiusY;
   index = dataBlockRadiusX + threadIdx.x + threadIdx.y * dataBlockWidth;

   if( x < 0 || y < 0 || x >= endX || y >= endY ) {
      data[ index ] = fetchBoundary( x, y );
   }
   else {
      data[ index ] = fetchData( x, y );
   }

   // Bottom Left
   x = ix - radiusX;
   y = iy + radiusY;
   index = threadIdx.x + ( dataBlockRadiusY + threadIdx.y ) * dataBlockWidth;

   if(x < 0 || y < 0 || x >= endX || y >= endY ) {
      data[ index ] = fetchBoundary( x, y );
   }
   else {
      data[ index ] = fetchData( x, y );
   }

   // Bottom Right
   x = ix + radiusX;
   y = iy + radiusY;
   index = dataBlockRadiusX + threadIdx.x + ( dataBlockRadiusY + threadIdx.y ) * dataBlockWidth;

   if( x < 0 || y < 0 || x >= endX || y >= endY ) {
      data[ index ] = fetchBoundary( x, y );
   }
   else {
      data[ index ] = fetchData( x, y );
   }

   __syncthreads();

   if( ix >= endX || iy >= endY )
      return;

   Real result = 0;

   for( Index j = 0; j < kernelHeight; j++ ) {
      Index align = ( j + threadIdx.y ) * dataBlockWidth;

      for( Index i = 0; i < kernelWidth; i++ ) {
         Index index = i + threadIdx.x + align;

         result = convolve( result, ix, iy, i, j, data[ index ]);
      }
   }

   store( ix, iy, result );
}


template<>
struct Convolution< 2, TNL::Devices::Cuda >
{
public:
   template< typename Index >
   using Vector = TNL::Containers::StaticVector< 2, Index >;

   template< typename Index, typename Real >
   static void
   setup( TNL::Cuda::LaunchConfiguration& configuration, const Vector< Index >& dimensions, const Vector< Index >& kernelSize )
   {
      Index kernelElementCount = 1;

      for( Index i = 0; i < kernelSize.getSize(); i++ )
         kernelElementCount *= ( 2 * kernelSize[ i ] ) - 1;

      configuration.dynamicSharedMemorySize = kernelElementCount * sizeof( Real );

      configuration.blockSize.x = kernelSize.x();
      configuration.blockSize.y = kernelSize.y();

      configuration.gridSize.x =
         TNL::min( TNL::Cuda::getMaxGridSize(), TNL::Cuda::getNumberOfBlocks( dimensions.x(), configuration.blockSize.x ) );
      configuration.gridSize.y =
         TNL::min( TNL::Cuda::getMaxGridSize(), TNL::Cuda::getNumberOfBlocks( dimensions.y(), configuration.blockSize.y ) );
   }

   template< typename Index,
             typename Real,
             typename FetchData,
             typename FetchBoundary,
             typename Convolve,
             typename Store >
   static void
   execute( const Vector< Index >& dimensions,
            const Vector< Index >& kernelSize,
            FetchData&& fetchData,
            FetchBoundary&& fetchBoundary,
            Convolve&& convolve,
            Store&& store )
   {
      TNL::Cuda::LaunchConfiguration configuration;

      setup< Index, Real >( configuration, dimensions, kernelSize );

      constexpr auto kernel = convolution2D< Index, Real, FetchData, FetchBoundary, Convolve, Store >;

      TNL::Cuda::launchKernel< true >( kernel,
                                       0,
                                       configuration,
                                       kernelSize.x(),
                                       kernelSize.y(),
                                       dimensions.x(),
                                       dimensions.y(),
                                       fetchData,
                                       fetchBoundary,
                                       convolve,
                                       store );
   };
};

#endif
+27 −35
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#pragma once

#include "Solver.h"
#include "DummyTask.h"
#include "HeatEquationTask.h"

#include <TNL/Algorithms/ParallelFor.h>
#include <TNL/Timer.h>
@@ -11,8 +11,8 @@ static std::vector< TNL::String > dimensionIds = { "grid-x-size", "grid-y-size"
static std::vector< TNL::String > kernelSizeIds = { "kernel-x-size", "kernel-y-size" };
static std::vector< TNL::String > domainIds = { "domain-x-size", "domain-y-size" };
static std::string sigmaKey = "sigma";
static std::string timestepKey = "timeStep";
static std::string finalTimeKey = "finalTime";
static std::string timeStepKey = "timeStep";
static std::string timeKey = "time";
static std::string outputFilenamePrefix = "outputFilenamePrefix";

template< typename Real = double >
@@ -64,16 +64,29 @@ public:
      result.setLike( function );
      result = 0;

      auto finalTime = parameters.getParameter< Real >( finalTimeKey );
      auto timeStep = parameters.getParameter< double >( timeStepKey );
      auto finalTime = parameters.getParameter< double >( timeKey );

      convolve( dimensions, domain, spaceSteps, kernelSize, function.getConstView(), result.getView(), finalTime );
      int iterationsCount = finalTime / timeStep;

      auto finalFilename = filenamePrefix + "_final.txt";
      double time = timeStep;

      if( ! writeGNUPlot( finalFilename, dimensions, spaceSteps, domain, result.getConstView() ) ) {
      for (int i = 1; i <= iterationsCount; i++) {
         printf("Time: %lf\n", time);

         convolve( dimensions, domain, kernelSize, function.getConstView(), result.getView(), time );

         auto filename = TNL::String("data_") + TNL::convertToString(i) + ".txt";

         if( ! writeGNUPlot( filename, dimensions, spaceSteps, domain, result.getConstView() ) ) {
            std::cout << "Did fail during file write";
            return;
         }

         result = 0;

         time += timeStep;
      }
   }

   virtual TNL::Config::ConfigDescription
@@ -82,8 +95,8 @@ public:
      TNL::Config::ConfigDescription config = Base::makeInputConfig();

      config.addDelimiter( "Grid settings:" );
      config.addEntry< int >( dimensionIds[ 0 ], "Grid size along x-axis.", 100 );
      config.addEntry< int >( dimensionIds[ 1 ], "Grid size along y-axis.", 100 );
      config.addEntry< int >( dimensionIds[ 0 ], "Grid size along x-axis.", 200 );
      config.addEntry< int >( dimensionIds[ 1 ], "Grid size along y-axis.", 200 );

      config.addDelimiter( "Kernel settings:" );
      config.addEntry< int >( kernelSizeIds[ 0 ], "Kernel size along x-axis.", 3 );
@@ -97,7 +110,8 @@ public:

      config.addEntry< Real >( sigmaKey, "Sigma in exponential initial condition.", 0.5);

      config.addEntry< Real >( finalTimeKey, "Final time of the simulation.", 0.12);
      config.addEntry< Real >( timeStepKey, "Time step of the simulation.", 0.005);
      config.addEntry< Real >( timeKey, "Final time of the simulation.", 0.36);

      return config;
   }
@@ -136,34 +150,12 @@ public:
   void
   convolve( const Vector& dimensions,
             const Point& domain,
             const Point& spaceSteps,
             const Vector& kernelSize,
             typename DataStore::ConstViewType input,
             typename DataStore::ViewType result,
             const Real time ) const
   {
      HostDataStore kernel;

      kernel.resize( kernelSize.x() * kernelSize.y() );

      for( int j = 0; j < kernelSize.y(); j++ ) {
         for( int i = 0; i < kernelSize.x(); i++ ) {
            int index = i + j * kernelSize.x();

            auto x = i * spaceSteps.x() - domain.x() / 2.;
            auto y = j * spaceSteps.y() - domain.y() / 2.;

            kernel[ index ] = ( 1. / ( 4. * M_PI * time ) ) * exp( -( x * x + y * y ) / ( 4. * time ) );
         }
      }

      std::cout << kernel << std::endl;

      DataStore kernelDevice( kernel );

      auto kernelView = kernelDevice.getConstView();

      DummyTask< int, Real, Dimension, Device >::exec( dimensions, kernelSize, input, result, kernelView, 0);
      HeatEquationTask< int, Real, Dimension, Device >::exec( dimensions, kernelSize, domain, { 3., 3. }, time, input, result);
   }

   bool
+94 −0
Original line number Diff line number Diff line

#pragma once

template< int Dimension, typename Device >
struct Convolution
{
   template< typename Index >
   using Vector = TNL::Containers::StaticVector< Dimension, Index >;

   template< typename Index,
             typename Real,
             typename FetchData,
             typename FetchBoundary,
             typename Convolve,
             typename Store >
   static void
   execute( const Vector< Index >& dimensions,
            const Vector< Index >& kernelSize,
            FetchData&& fetchData,
            FetchBoundary&& fetchBoundary,
            Convolve&& convolve,
            Store&& store );
};

template< typename Index, typename Real, int Dimension, typename Device >
struct HeatEquationTask;

template< typename Index, typename Real >
struct HeatEquationTask< Index, Real, 2, TNL::Devices::Cuda >
{
public:
   static constexpr int Dimension = 2;
   using Device = TNL::Devices::Cuda;
   using Vector = TNL::Containers::StaticVector< Dimension, Index >;
   using Point = TNL::Containers::StaticVector< Dimension, Real >;
   using ConstDataStore = typename TNL::Containers::Vector< Real, Device, Index >::ConstViewType;
   using DataStore = typename TNL::Containers::Vector< Real, Device, Index >::ViewType;
   using ConvolutionLauncher = Convolution< Dimension, Device >;

   static void
   exec( const Vector& dimensions,
         const Vector& kernelSize,
         const Point& functionDomain,
         const Point& kernelDomain,
         const Real time,
         ConstDataStore& input,
         DataStore& result)
   {
      auto functionSpaceSteps = Point(functionDomain.x() / dimensions.x(), functionDomain.y() / dimensions.y());
      auto kernelSpaceSteps = Point(kernelDomain.x() / kernelSize.x(), kernelDomain.y() / kernelSize.y());

      auto fetchData = [ = ] __cuda_callable__( Index i, Index j )
      {
         auto index = i + j * dimensions.x();

         return input[ index ];
      };

      auto fetchBoundary = [ = ] __cuda_callable__( Index i, Index j )
      {
         return 0;
      };

      auto convolve = [ = ] __cuda_callable__( Real result, Index dataX, Index dataY, Index kernelX, Index kernelY, Real data )
      {
         auto functionXPos = dataX * functionSpaceSteps.x() - (functionDomain.x() / 2),
              functionYPos = dataY * functionSpaceSteps.y() - (functionDomain.y() / 2);

         auto kernelXPos = (kernelX - kernelSize.x() / 2) * kernelSpaceSteps.x(),
              kernelYPos = (kernelY - kernelSize.y() / 2) * kernelSpaceSteps.y();

         auto deltaXPos = kernelXPos - functionXPos,
              deltaYPos = kernelYPos - functionYPos;

         auto kernel = kernelSpaceSteps.x() * kernelSpaceSteps.y() * ( (Real)1 / ( (Real)4 * M_PI * time ) ) * exp( - ( pow(deltaXPos, 2.) + pow(deltaYPos, 2.)  ) / ( (Real)4 * time ) );

         return result + data * kernel;
      };

      auto store = [ = ] __cuda_callable__( Index i, Index j, Real resultValue ) mutable
      {
         auto index = i + j * dimensions.x();

         result[ index ] = resultValue;
      };

      ConvolutionLauncher::execute< Index, Real >( dimensions,
                                                   kernelSize,
                                                   std::forward< decltype( fetchData ) >( fetchData ),
                                                   std::forward< decltype( fetchBoundary ) >( fetchBoundary ),
                                                   std::forward< decltype( convolve ) >( convolve ),
                                                   std::forward< decltype( store ) >( store ) );
   }
};
+4 −4

File changed.

Contains only whitespace changes.