Commit 008601ad authored by Matouš Fencl's avatar Matouš Fencl Committed by Tomáš Oberhuber
Browse files

Fix 2D GPU neighbours. Version with Chess method and OpenMP FSM methods.

parent f2dc4517
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -365,13 +365,13 @@ __global__ void GetNeighbours( const TNL::Containers::Array< int, Devices::Cuda,
    m = i%numBlockX;
    k = i/numBlockX;
    if( m > 0 && blockCalculationIndicator[ i - 1 ] ){
      pom = 1;//BlockIterPom[ i ] = 1;
      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
    }else if( m < numBlockX -1 && blockCalculationIndicator[ i + 1 ] ){
      pom = 1;//BlockIterPom[ i ] = 1;
    }else if( k > 0 && blockCalculationIndicatorHelp[ i - numBlockX ] ){
      pom = 1;// BlockIterPom[ i ] = 1;
      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
    }else if( k > 0 && blockCalculationIndicator[ i - numBlockX ] ){
      pom = 1;// blockCalculationIndicatorHelp[ i ] = 1;
    }else if( k < numBlockY -1 && blockCalculationIndicator[ i + numBlockX ] ){
      pom = 1;//BlockIterPom[ i ] = 1;
      pom = 1;//blockCalculationIndicatorHelp[ i ] = 1;
    }
    
    if( blockCalculationIndicator[ i ] != 1 )
+3 −3
Original line number Diff line number Diff line
@@ -109,7 +109,7 @@ initInterface( const MeshFunctionPointer& _input,
                output[ cell.getIndex() ] = pom;
              pom = pom - TNL::sign( c )*hx;
              if( TNL::abs( output[ e ] ) > TNL::abs( pom ) )
                output[ e ] = pom; //( hy * c )/( c - input[ n ]) - hy;
                output[ e ] = pom; 
              
              interfaceMap[ cell.getIndex() ] = true;
              interfaceMap[ e ] = true;
@@ -122,7 +122,7 @@ initInterface( const MeshFunctionPointer& _input,
                output[ cell.getIndex() ] = pom;
              pom = pom - TNL::sign( c )*hz;
              if( TNL::abs( output[ t ] ) > TNL::abs( pom ) )
                output[ t ] = pom; //( hy * c )/( c - input[ n ]) - hy;
                output[ t ] = pom; 
              
              interfaceMap[ cell.getIndex() ] = true;
              interfaceMap[ t ] = true;
@@ -736,7 +736,7 @@ updateBlocks( const InterfaceMapType interfaceMap,
        MeshFunctionType& helpFunc,
        ArrayContainer BlockIterHost, int numThreadsPerBlock/*, Real **sArray*/ )
{  
  //#pragma omp parallel for schedule( dynamic )
  #pragma omp parallel for schedule( dynamic )
  for( IndexType i = 0; i < BlockIterHost.getSize(); i++ )
  {
    if( BlockIterHost[ i ] )
+4 −11
Original line number Diff line number Diff line
@@ -363,7 +363,7 @@ solve( const MeshPointer& mesh,
           interfaceMapPtr.template getData< Device >(),
           auxPtr.template getData< Device>(),
           helpFunc.template modifyData< Device>(),
           blockCalculationIndicator,
           blockCalculationIndicator, vecLowerOverlaps, vecUpperOverlaps,
           oddEvenBlock );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
@@ -381,15 +381,8 @@ solve( const MeshPointer& mesh,
           
           oddEvenBlock= (oddEvenBlock == 0) ? 1: 0;
           
           CudaParallelReduc<<< nBlocks , 1024 >>>( blockCalculationIndicator, dBlock, ( numBlocksX * numBlocksY ) );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
           CudaParallelReduc<<< 1, nBlocks >>>( dBlock, dBlock, nBlocks );
           cudaDeviceSynchronize();
           TNL_CHECK_CUDA_DEVICE;
           
           BlockIterD = dBlock.getElement( 0 );*/
          
           calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
          */
  /**------------------------------------------------------------------------------------------------*/
          
          
@@ -441,7 +434,7 @@ solve( const MeshPointer& mesh,
          cudaDeviceSynchronize();
          TNL_CHECK_CUDA_DEVICE;
          
          // "Parallel reduction" to see if we should calculate again BlockIterD
          // "Parallel reduction" to see if we should calculate again calculateCudaBlocksAgain
          calculateCudaBlocksAgain = blockCalculationIndicator.containsValue(1);
          
          // When we change something then we should caclucate again in the next passage of MPI ( calculated = true )
+50 −14
Original line number Diff line number Diff line
@@ -103,8 +103,30 @@ solve( const MeshPointer& mesh,
        calculateMPIAgain = 0;
        
/** HERE IS FSM FOR OPENMP (NO MPI) - isnt worthy */
        /*int numThreadsPerBlock = 64;
         
        /*int numThreadsPerBlock = -1;
         
         numThreadsPerBlock = ( mesh->getDimensions().x()/2 + (mesh->getDimensions().x() % 2 != 0 ? 1:0));
         //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
         if( numThreadsPerBlock <= 16 )
         numThreadsPerBlock = 16;
         else if(numThreadsPerBlock <= 32 )
         numThreadsPerBlock = 32;
         else if(numThreadsPerBlock <= 64 )
         numThreadsPerBlock = 64;
         else if(numThreadsPerBlock <= 128 )
         numThreadsPerBlock = 128;
         else if(numThreadsPerBlock <= 256 )
         numThreadsPerBlock = 256;
         else if(numThreadsPerBlock <= 512 )
         numThreadsPerBlock = 512;
         else
         numThreadsPerBlock = 1024;
         //printf("numThreadsPerBlock = %d\n", numThreadsPerBlock);
         
         if( numThreadsPerBlock == -1 ){
            printf("Fail in setting numThreadsPerBlock.\n");
         break;
         }
         
         int numBlocksX = mesh->getDimensions().x() / numThreadsPerBlock + (mesh->getDimensions().x() % numThreadsPerBlock != 0 ? 1:0);
         int numBlocksY = mesh->getDimensions().y() / numThreadsPerBlock + (mesh->getDimensions().y() % numThreadsPerBlock != 0 ? 1:0);
@@ -140,8 +162,22 @@ solve( const MeshPointer& mesh,
         helpFunc1 = auxPtr;
         auxPtr = helpFunc;
         helpFunc = helpFunc1;
         switch ( numThreadsPerBlock ){
         case 16:
         this->template updateBlocks< 18 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         case 32:
         this->template updateBlocks< 34 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         case 64:
         this->template updateBlocks< 66 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         
         case 128:
         this->template updateBlocks< 130 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         case 256:
         this->template updateBlocks< 258 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         case 512:
         this->template updateBlocks< 514 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         default:
         this->template updateBlocks< 1028 >( interfaceMap, *auxPtr, *helpFunc, BlockIterHost, numThreadsPerBlock );
         }
         //Reduction      
         for( int i = 0; i < BlockIterHost.getSize(); i++ ){
         if( IsCalculationDone == 0 ){
@@ -176,43 +212,43 @@ solve( const MeshPointer& mesh,
    // TOP, NORTH and WEST        
        boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
        boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
        
    // TOP, SOUTH and EAST        
        boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
        boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
        
    // TOP, SOUTH and WEST        
        boundsFrom[2] = vecLowerOverlaps[2]; boundsTo[2] = mesh->getDimensions().z() - vecUpperOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; 
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; 
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
            
    // BOTTOM, NOTH and EAST        
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
        boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
        boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy ); 
        
    // BOTTOM, NOTH and WEST        
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
        boundsFrom[1] = vecLowerOverlaps[1]; boundsTo[1] = mesh->getDimensions().y() - vecUpperOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0]; 
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0]; 
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );
        
    // BOTTOM, SOUTH and EAST        
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
        boundsFrom[0] = vecLowerOverlaps[0]; boundsTo[0] = mesh->getDimensions().x() - vecUpperOverlaps[0];
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );    
        
    // BOTTOM, SOUTH and WEST        
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = vecLowerOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = vecLowerOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = vecLowerOverlaps[0];
        boundsFrom[2] = mesh->getDimensions().z() - 1 - vecUpperOverlaps[2]; boundsTo[2] = - 1 + vecLowerOverlaps[2];
        boundsFrom[1] = mesh->getDimensions().y() - 1 - vecUpperOverlaps[1]; boundsTo[1] = - 1 + vecLowerOverlaps[1];
        boundsFrom[0] = mesh->getDimensions().x() - 1 - vecUpperOverlaps[0]; boundsTo[0] = - 1 + vecLowerOverlaps[0];
        goThroughSweep( boundsFrom, boundsTo, aux, interfaceMap, anisotropy );