From 092a025db1292dbf94213e71f5a6484e19406d1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= <klinkjak@fjfi.cvut.cz>
Date: Mon, 19 Dec 2016 22:34:22 +0100
Subject: [PATCH] Optimized ldSize value in CWYGMRES

---
 src/TNL/Solvers/Linear/CWYGMRES_impl.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/TNL/Solvers/Linear/CWYGMRES_impl.h b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
index ff009863fd..692746a004 100644
--- a/src/TNL/Solvers/Linear/CWYGMRES_impl.h
+++ b/src/TNL/Solvers/Linear/CWYGMRES_impl.h
@@ -616,8 +616,12 @@ bool CWYGMRES< Matrix, Preconditioner > :: setSize( IndexType _size, IndexType m
 {
    if( size == _size && restarting == m ) return true;
    size = _size;
-   // align each column to 256 bytes
-   ldSize = roundToMultiple( size, 256 / sizeof( RealType ) );
+   if( std::is_same< DeviceType, Devices::Cuda >::value )
+      // align each column to 256 bytes - optimal for CUDA
+      ldSize = roundToMultiple( size, 256 / sizeof( RealType ) );
+   else
+       // on the host, we add 1 to disrupt the cache false-sharing pattern
+      ldSize = roundToMultiple( size, 256 / sizeof( RealType ) ) + 1;
    restarting = m;
    if( ! r.setSize( size ) ||
        ! z.setSize( size ) ||
-- 
GitLab