Commit 092a025d authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Optimized ldSize value in CWYGMRES

parent 6c2c196e
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -616,8 +616,12 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m
{
   if( size == _size && restarting == m ) return true;
   size = _size;
   // align each column to 256 bytes
   if( std::is_same< DeviceType, Devices::Cuda >::value )
      // align each column to 256 bytes - optimal for CUDA
      ldSize = roundToMultiple( size, 256 / sizeof( RealType ) );
   else
       // on the host, we add 1 to disrupt the cache false-sharing pattern
      ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1;
   restarting = m;
   if( ! r.setSize( size ) ||
       ! z.setSize( size ) ||