diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 92a8cc7d30401fbce100d04504e5f0a866d76600..c9a855d9ea9c8bbbace360d7b34471204a2fb3b6 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -93,7 +93,7 @@ benchmarkSpMV( Benchmark & benchmark,
     CSR_HostMatrix CSRhostMatrix;
     CSR_DeviceMatrix CSRdeviceMatrix;
     
-    std::cout << "Reading CSR to set up cuSPARSE..." << std::endl;
+//    std::cout << "Reading CSR to set up cuSPARSE..." << std::endl;
     
     // Read the matrix for CSR, to set up cuSPARSE
     try
@@ -148,7 +148,7 @@ benchmarkSpMV( Benchmark & benchmark,
     HostVector hostVector, hostVector2;
     CudaVector deviceVector, deviceVector2;
     
-    std::cout << "\nReading " << getMatrixFormat( hostMatrix ) << " format..." << std::endl;
+//    std::cout << "\nReading " << getMatrixFormat( hostMatrix ) << " format..." << std::endl;
     
     // Load the format
     try
@@ -176,7 +176,7 @@ benchmarkSpMV( Benchmark & benchmark,
           return false;
       }
     
-    std::cout << "Before cross-device assignment" << std::endl;
+//    std::cout << "Before cross-device assignment" << std::endl;
     
 #ifdef HAVE_CUDA
     // FIXME: This doesn't work for Ad/BiEllpack, because
@@ -186,7 +186,7 @@ benchmarkSpMV( Benchmark & benchmark,
     deviceMatrix = hostMatrix;
 #endif
     // sls.mtx: This doesn't even get printed
-    std::cout << "After cross-device assignment" << std::endl;
+//    std::cout << "After cross-device assignment" << std::endl;
 
     // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
     //  because we need the matrix loaded first to get the rows and columns
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index 26cb289fddf916a9fec3b9ada1f82eacd89d33a6..b71233022b584c90ea8df589b471a9a549458aa0 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -60,14 +60,14 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
    this->rows = rows;
    this->columns = columns;
       
-   std::cout << "INSIDE setDimensions (BEFORE roundToMultiple): this->alignedRows = " << this->alignedRows << std::endl;
-   std::cout << "INSIDE setDimensions (BEFORE roundToMultiple): rows = " << rows << std::endl;
+//   std::cout << "INSIDE setDimensions (BEFORE roundToMultiple): this->alignedRows = " << this->alignedRows << std::endl;
+//   std::cout << "INSIDE setDimensions (BEFORE roundToMultiple): rows = " << rows << std::endl;
    
    // ERROR? RoundToMultiple can in very rare cases return a multiple, that is lower than the number of rows?
    //          e.g. with sls.mtx, the number of rows is 1748122, but when on CUDA, roundToMultiple gives 62752.
    if( std::is_same< Device, Devices::Cuda >::value )
    {
-       std::cout << "columns = " << columns << "\tWarpSize() = " << Devices::Cuda::getWarpSize() << std::endl;
+//       std::cout << "columns = " << columns << "\tWarpSize() = " << Devices::Cuda::getWarpSize() << std::endl;
        this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() );
 
        // If the number of alignedRows is smaller than the number of rows, we find the 
@@ -77,23 +77,23 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows,
        {
            IndexType missingRows = this->rows - this->alignedRows;
            
-           std::cout << "  this->rows = " << this->rows << "\tthis->alignedRows = " << this->alignedRows << std::endl;
-           std::cout << "  IF missingRows (pre-round) = " << missingRows << std::endl;
+//           std::cout << "  this->rows = " << this->rows << "\tthis->alignedRows = " << this->alignedRows << std::endl;
+//           std::cout << "  IF missingRows (pre-round) = " << missingRows << std::endl;
            
            missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() );
            
-           std::cout << "  IF missingRows (after-round) = " << missingRows << std::endl;
-           std::cout << "  PRE this->alignedRows = " << this->alignedRows << std::endl;
+//           std::cout << "  IF missingRows (after-round) = " << missingRows << std::endl;
+//           std::cout << "  PRE this->alignedRows = " << this->alignedRows << std::endl;
            
            this->alignedRows +=  missingRows;
            
 //           this->alignedRows += roundToMultiple( this->rows - this->alignedRows, Devices::Cuda::getWarpSize() );
        }
-       std::cout << "AFTER setDimensions: this->alignedRows = " << this->alignedRows << std::endl;
+//       std::cout << "AFTER setDimensions: this->alignedRows = " << this->alignedRows << std::endl;
    }
    else this->alignedRows = rows;
    
-   std::cout << "INSIDE setDimensions: this->alignedRows = " << this->alignedRows << std::endl;
+//   std::cout << "INSIDE setDimensions: this->alignedRows = " << this->alignedRows << std::endl;
    
    if( this->rowLengths != 0 )
       allocateElements();
@@ -110,7 +110,7 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow
 
    this->rowLengths = this->maxRowLength = rowLengths.max();
    
-   std::cout << "  this->rowLengths = " << this->rowLengths << std::endl;
+//   std::cout << "  this->rowLengths = " << this->rowLengths << std::endl;
    
    allocateElements();
 }
@@ -679,7 +679,7 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2
    this->rowLengths = matrix.rowLengths;
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
    
-   std::cout << "DIMENSIONS set; after setDimensions in operator= cross-device" << std::endl;
+//   std::cout << "DIMENSIONS set; after setDimensions in operator= cross-device" << std::endl;
 
    const int blockSize = 32;
    const int blocks = roundUpDivision( this->getRows(), blockSize );
@@ -797,7 +797,7 @@ void Ellpack< Real, Device, Index >::allocateElements()
 {
     // The allocation process isn't limited by RAM with ELL, but rather the size of the values and indexes arrays. Bcs ELL will store rows*maxRowLength elements in one array.
     // The PROBLEM arises when we try to store the entire matrix into one array, which is what ELL essentially does in this case.
-   std::cout << "  this->alignedRows = " << this->alignedRows << "\t this->rowLengths = " << this->rowLengths << std::endl;
+//   std::cout << "  this->alignedRows = " << this->alignedRows << "\t this->rowLengths = " << this->rowLengths << std::endl;
    
    // HOW? Will we have to do this with every format? How to make this global?
    IndexType numMtxElmnts = this->alignedRows * this->rowLengths;
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index 6d6b3eb555150ce516f11781fc12ba41f2ea4e3a..b3fb33856a812a0ff82eb26d134f411c62d5ed2f 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -69,13 +69,13 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
    if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
       return false;
 
-   std::cout << "  rowLengths sizeof: " << sizeof( rowLengths ) << std::endl;
-   std::cout << "  rowLengths element sizeof: " << sizeof( rowLengths[0] ) << std::endl;
-   std::cout << "  rowLengths getSize(): " << rowLengths.getSize() << std::endl;
+//   std::cout << "  rowLengths sizeof: " << sizeof( rowLengths ) << std::endl;
+//   std::cout << "  rowLengths element sizeof: " << sizeof( rowLengths[0] ) << std::endl;
+//   std::cout << "  rowLengths getSize(): " << rowLengths.getSize() << std::endl;
    
    matrix.setCompressedRowLengths( rowLengths );
    
-   std::cout << "->CompressedRowLengths SET" << std::endl;
+//   std::cout << "->CompressedRowLengths SET" << std::endl;
 
    if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) )
       return false;
@@ -347,7 +347,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
    Timer timer;
    timer.start();
    
-   std::cout << "\nBefore while..." << std::endl;
+//   std::cout << "\nBefore while..." << std::endl;
    
    while( std::getline( file, line ) )
    {
@@ -380,7 +380,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
       }
    }
    
-   std::cout << "\nAfter while..." << std::endl;
+//   std::cout << "\nAfter while..." << std::endl;
    
    file.clear();
    long int fileSize = file.tellg();
@@ -390,7 +390,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
               << " -> " << timer.getRealTime()
               << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
    
-   std::cout << "->END of reading matrix elements from file" << std::endl;
+//   std::cout << "->END of reading matrix elements from file" << std::endl;
    
    return true;
 }
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index ab32d362daac3a5ffd19b8794f5e8804eb63200d..84d734a937bea3575ecaf737f14d400585fcca92 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -109,7 +109,7 @@ template< typename Real,
           typename Index >
 void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements )
 {
-    std::cout << "  Allocating matrix elements..." << std::endl;
+//    std::cout << "  Allocating matrix elements..." << std::endl;
    // CHECKING: if the number of matrix elements is larger than the highest number the IndexType can go to?
    // INT OVERFLOW
     
@@ -121,7 +121,7 @@ void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& num
    //                tell from within this method, if numberOfMatrixElements is an overflown value or not.
    TNL_ASSERT_GE( numberOfMatrixElements, 0, "Number of matrix elements must be non-negative." );
     
-   std::cout << "  numberOfMatrixElements = " << numberOfMatrixElements << std::endl;
+//   std::cout << "  numberOfMatrixElements = " << numberOfMatrixElements << std::endl;
    
    this->values.setSize( numberOfMatrixElements );
    this->columnIndexes.setSize( numberOfMatrixElements );
@@ -133,7 +133,7 @@ void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& num
    if( numberOfMatrixElements > 0 )
       this->columnIndexes.setValue( this->columns );
    
-   std::cout << "->END OF allocateMatrixElements!!!" << std::endl;
+//   std::cout << "->END OF allocateMatrixElements!!!" << std::endl;
 }
 
 template< typename Real,