Skip to content
Snippets Groups Projects
Commit 27a355c6 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed block size in ParallelFor3D and improved heuristics

parent 7930075e
No related branches found
No related tags found
No related merge requests found
......@@ -288,20 +288,36 @@ struct ParallelFor3D< Devices::Cuda >
const Index sizeZ = endZ - startZ;
dim3 blockSize;
if( sizeX >= sizeY * sizeZ ) {
if( sizeX >= sizeY * sizeY * sizeZ * sizeZ ) {
blockSize.x = TNL::min( 256, sizeX );
blockSize.y = 1;
blockSize.z = 1;
}
else if( sizeY >= sizeX * sizeZ ) {
else if( sizeY >= sizeX * sizeX * sizeZ * sizeZ ) {
blockSize.x = 1;
blockSize.y = TNL::min( 256, sizeY );
blockSize.z = 1;
}
else if( sizeZ >= sizeX * sizeY ) {
blockSize.x = 1;
else if( sizeZ >= sizeX * sizeX * sizeY * sizeY ) {
blockSize.x = TNL::min( 2, sizeX );
blockSize.y = TNL::min( 2, sizeY );
// CUDA allows max 64 for blockSize.z
blockSize.z = TNL::min( 64, sizeZ );
}
else if( sizeX >= sizeZ * sizeZ && sizeY >= sizeZ * sizeZ ) {
blockSize.x = TNL::min( 32, sizeX );
blockSize.y = TNL::min( 8, sizeY );
blockSize.z = 1;
}
else if( sizeX >= sizeY * sizeY && sizeZ >= sizeY * sizeY ) {
blockSize.x = TNL::min( 32, sizeX );
blockSize.y = 1;
blockSize.z = TNL::min( 256, sizeZ );
blockSize.z = TNL::min( 8, sizeZ );
}
else if( sizeY >= sizeX * sizeX && sizeZ >= sizeX * sizeX ) {
blockSize.x = 1;
blockSize.y = TNL::min( 32, sizeY );
blockSize.z = TNL::min( 8, sizeZ );
}
else {
blockSize.x = TNL::min( 16, sizeX );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment