Commit fddcd146 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixed grid boundary entities traversing in CUDA in 2D.

parent 66a0dcd8
Loading
Loading
Loading
Loading
+60 −0
Original line number Diff line number Diff line
/***************************************************************************
                          HeatEquationBuildConfigTag.h  -  description
                             -------------------
    begin                : Jul 7, 2014
    copyright            : (C) 2014 by Tomas Oberhuber
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

#pragma once

#include <TNL/Solvers/BuildConfigTags.h>

namespace TNL {
namespace Solvers {
   
class HeatEquationBuildConfig
{
   public:

      static void print() { std::cerr << "HeatEquationBuildConfig" << std::endl; }
};

/****
 * Turn off support for float and long double.
 */
template<> struct ConfigTagReal< HeatEquationBuildConfig, float > { enum { enabled = false }; };
template<> struct ConfigTagReal< HeatEquationBuildConfig, long double > { enum { enabled = false }; };

/****
 * Turn off support for short int and long int indexing.
 */
template<> struct ConfigTagIndex< HeatEquationBuildConfig, short int >{ enum { enabled = false }; };
template<> struct ConfigTagIndex< HeatEquationBuildConfig, long int >{ enum { enabled = false }; };

/****
 * Use of Grid is enabled for allowed dimensions and Real, Device and Index types.
 */
template< int Dimensions, typename Real, typename Device, typename Index >
   struct ConfigTagMesh< HeatEquationBuildConfig, Meshes::Grid< Dimensions, Real, Device, Index > >
      { enum { enabled = ConfigTagDimensions< HeatEquationBuildConfig, Dimensions >::enabled  &&
                         ConfigTagReal< HeatEquationBuildConfig, Real >::enabled &&
                         ConfigTagDevice< HeatEquationBuildConfig, Device >::enabled &&
                         ConfigTagIndex< HeatEquationBuildConfig, Index >::enabled }; };

/****
 * Please, chose your preferred time discretization  here.
 */
template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ExplicitTimeDiscretisationTag >{ enum { enabled = true }; };
template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, SemiImplicitTimeDiscretisationTag >{ enum { enabled = false }; };
template<> struct ConfigTagTimeDiscretisation< HeatEquationBuildConfig, ImplicitTimeDiscretisationTag >{ enum { enabled = false }; };

/****
 * Only the Runge-Kutta-Merson solver is enabled by default.
 */
template<> struct ConfigTagExplicitSolver< HeatEquationBuildConfig, ExplicitEulerSolverTag >{ enum { enabled = false }; };

} // namespace Solvers
} // namespace TNL
+3 −1
Original line number Diff line number Diff line
@@ -21,12 +21,14 @@
#include <TNL/Functions/MeshFunction.h>
#include <TNL/Problems/HeatEquationProblem.h>
#include <TNL/Meshes/Grid.h>
#include "HeatEquationBuildConfigTag.h"

using namespace TNL;
using namespace TNL::Problems;

//typedef tnlDefaultBuildMeshConfig BuildConfig;
typedef Solvers::FastBuildConfig BuildConfig;
//typedef Solvers::FastBuildConfig BuildConfig;
typedef Solvers::HeatEquationBuildConfig BuildConfig;

template< typename MeshConfig >
class heatEquationConfig
+7 −17
Original line number Diff line number Diff line
@@ -23,18 +23,9 @@ void Cuda::setupThreads( const dim3& blockSize,
                         long long int yThreads,
                         long long int zThreads )
{
   if( blockSize.x )
      blocksCount.x = xThreads / blockSize.x + ( xThreads % blockSize.x != 0 );
   else
      blocksCount.x = 0;
   if( blockSize.y )
      blocksCount.y = yThreads / blockSize.y + ( yThreads % blockSize.y != 0 );
   else
      blocksCount.y = 0;
   if( blockSize.z )
      blocksCount.z = xThreads / blockSize.z + ( zThreads % blockSize.z != 0 );
   else
      blocksCount.z = 0;
   blocksCount.x = max( 1, xThreads / blockSize.x + ( xThreads % blockSize.x != 0 ) );
   blocksCount.y = max( 1, yThreads / blockSize.y + ( yThreads % blockSize.y != 0 ) );
   blocksCount.z = max( 1, zThreads / blockSize.z + ( zThreads % blockSize.z != 0 ) );
   
   /****
    * TODO: Fix the following:
@@ -80,21 +71,20 @@ void Cuda::setupGrid( const dim3& blocksCount,
   else
      gridSize.z = blocksCount.z % properties.maxGridSize[ 2 ];*/
   
   if( gridIdx.x < gridsCount.x )
   if( gridIdx.x < gridsCount.x - 1 )
      gridSize.x = getMaxGridSize();
   else
      gridSize.x = blocksCount.x % getMaxGridSize();
   
   if( gridIdx.y < gridsCount.y )
   if( gridIdx.y < gridsCount.y - 1 )
      gridSize.y = getMaxGridSize();
   else
      gridSize.y = blocksCount.y % getMaxGridSize();

   if( gridIdx.z < gridsCount.z )
   if( gridIdx.z < gridsCount.z - 1 )
      gridSize.z = getMaxGridSize();
   else
      gridSize.z = blocksCount.z % getMaxGridSize();
   
}

void Cuda::printThreadsSetup( const dim3& blockSize,
+4 −1
Original line number Diff line number Diff line
@@ -451,6 +451,7 @@ processEntities(
      for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongX.x; gridIdx.x++ )
      {
         Devices::Cuda::setupGrid( cudaBlocksCountAlongX, cudaGridsCountAlongX, gridIdx, cudaGridSize );
         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongX, cudaGridSize, cudaGridsCountAlongX );
         GridTraverser2DBoundaryAlongX< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
               <<< cudaGridSize, cudaBlockSize, 0, s1 >>>
               ( &gridPointer.template getData< Devices::Cuda >(),
@@ -475,6 +476,7 @@ processEntities(
      for( gridIdx.x = 0; gridIdx.x < cudaGridsCountAlongY.x; gridIdx.x++ )
      {
         Devices::Cuda::setupGrid( cudaBlocksCountAlongY, cudaGridsCountAlongY, gridIdx, cudaGridSize );
         //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCountAlongY, cudaGridSize, cudaGridsCountAlongY );
         GridTraverser2DBoundaryAlongY< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
               <<< cudaGridSize, cudaBlockSize, 0, s3 >>>
               ( &gridPointer.template getData< Devices::Cuda >(),
@@ -494,6 +496,7 @@ processEntities(
                 gridIdx,
                 gridEntityParameters... );
      }
      //getchar();
      cudaStreamSynchronize( s1 );
      cudaStreamSynchronize( s2 );
      cudaStreamSynchronize( s3 );
@@ -522,7 +525,7 @@ processEntities(
         for( gridIdx.x = 0; gridIdx.x < cudaGridsCount.x; gridIdx.x ++ )
         {
            Devices::Cuda::setupGrid( cudaBlocksCount, cudaGridsCount, gridIdx, cudaGridSize );
            Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
            //Devices::Cuda::printThreadsSetup( cudaBlockSize, cudaBlocksCount, cudaGridSize, cudaGridsCount );
            GridTraverser2D< Real, Index, GridEntity, UserData, EntitiesProcessor, processOnlyBoundaryEntities, GridEntityParameters... >
               <<< cudaGridSize, cudaBlockSize, 0, s >>>
               ( &gridPointer.template getData< Devices::Cuda >(),