diff --git a/src/TNL/Containers/Algorithms/CudaReductionKernel.h b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
index 2686ec83119569e4cc263781b3fa8045460c872a..e506c547a44d75369730954090a5c09939884166 100644
--- a/src/TNL/Containers/Algorithms/CudaReductionKernel.h
+++ b/src/TNL/Containers/Algorithms/CudaReductionKernel.h
@@ -57,9 +57,9 @@ CudaReductionKernel( Operation operation,
     * gridSize is the number of element processed by all blocks at the
     * same time.
     */
-   IndexType tid = threadIdx. x;
-   IndexType gid = blockIdx. x * blockDim. x + threadIdx. x;
-   IndexType gridSize = blockDim. x * gridDim.x;
+   const IndexType tid = threadIdx.x;
+         IndexType gid = blockIdx.x * blockDim. x + threadIdx.x;
+   const IndexType gridSize = blockDim.x * gridDim.x;
 
    sdata[ tid ] = operation.initialValue();
    /***
@@ -72,13 +72,13 @@ CudaReductionKernel( Operation operation,
       operation.cudaFirstReduction( sdata[ tid ], gid + gridSize,     input1, input2 );
       operation.cudaFirstReduction( sdata[ tid ], gid + 2 * gridSize, input1, input2 );
       operation.cudaFirstReduction( sdata[ tid ], gid + 3 * gridSize, input1, input2 );
-      gid += 4*gridSize;
+      gid += 4 * gridSize;
    }
    while( gid + 2 * gridSize < size )
    {
       operation.cudaFirstReduction( sdata[ tid ], gid,                input1, input2 );
       operation.cudaFirstReduction( sdata[ tid ], gid + gridSize,     input1, input2 );
-      gid += 2*gridSize;
+      gid += 2 * gridSize;
    }
    while( gid < size )
    {
@@ -260,8 +260,8 @@ CudaReductionKernelLauncher( Operation& operation,
          cudaFuncSetCacheConfig(CudaReductionKernel< Operation,   4 >, cudaFuncCachePreferShared);
 
          CudaReductionKernel< Operation,   4 >
-        <<< gridSize, blockSize, shmem >>>( operation, size, input1, input2, output);
-        break;
+         <<< gridSize, blockSize, shmem >>>( operation, size, input1, input2, output);
+         break;
       case   2:
          cudaFuncSetCacheConfig(CudaReductionKernel< Operation,   2 >, cudaFuncCachePreferShared);
 
diff --git a/src/TNL/Containers/Algorithms/Multireduction_impl.h b/src/TNL/Containers/Algorithms/Multireduction_impl.h
index cb7efdfa274fc73ff0b3265621d83e2fab8c2646..1eff176f014355a78afc7f0207d086a0f218e8c4 100644
--- a/src/TNL/Containers/Algorithms/Multireduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Multireduction_impl.h
@@ -103,7 +103,7 @@ reduce( Operation& operation,
                                                                    deviceAux1 );
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
-      cout << "   Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << endl;
+      std::cout << "   Multireduction of " << n << " datasets on GPU to size " << reducedSize << " took " << timer.getRealTime() << " sec. " << std::endl;
       timer.reset();
       timer.start();
    #endif
@@ -117,18 +117,18 @@ reduce( Operation& operation,
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
-      cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << endl;
+      std::cout << "   Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl;
       timer.reset();
       timer.start();
    #endif
 
-//   cout << "resultArray = [";
+//   std::cout << "resultArray = [";
 //   for( int i = 0; i < n * reducedSize; i++ ) {
-//      cout << resultArray[ i ];
+//      std::cout << resultArray[ i ];
 //      if( i < n * reducedSize - 1 )
-//         cout << ", ";
+//         std::cout << ", ";
 //   }
-//   cout << "]" << endl;
+//   std::cout << "]" << std::endl;
 
    /***
     * Reduce the data on the host system.
@@ -138,7 +138,7 @@ reduce( Operation& operation,
 
    #ifdef CUDA_REDUCTION_PROFILING
       timer.stop();
-      cout << "   Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << endl;
+      std::cout << "   Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl;
    #endif
 
    return checkCudaDevice;
diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h
index 64c503c106565ce654f8830d4b484f521df65fc6..bbc312da0c574c90cfaa7ab637a5c8efaf035dc6 100644
--- a/src/TNL/Containers/Algorithms/Reduction_impl.h
+++ b/src/TNL/Containers/Algorithms/Reduction_impl.h
@@ -36,9 +36,6 @@ namespace Algorithms {
  */
 const int minGPUReductionDataSize = 256;//65536; //16384;//1024;//256;
 
-#ifdef HAVE_CUDA
-#endif
-
 template< typename Operation >
 bool
 reductionOnCudaDevice( Operation& operation,