diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h
index 484ff2358932e5373cd21dccda07cf3dd580ae31..6a9dab96a4913e15a0f2e970a6480f88f5946404 100644
--- a/src/Benchmarks/SpMV/spmv.h
+++ b/src/Benchmarks/SpMV/spmv.h
@@ -59,8 +59,6 @@ std::string getMatrixFormat( const Matrix& matrix )
     return format;
 }
 
-// This function is not used currently (as of 17.03.19),
-//  as the log takes care of printing and saving this information into the log file.
 // Print information about the matrix.
 template< typename Matrix >
 void printMatrixInfo( const Matrix& matrix,
@@ -218,14 +216,6 @@ benchmarkSpMV( Benchmark & benchmark,
     // Setup cuSPARSE MetaData, since it has the same header as CSR, 
     //  and therefore will not get its own headers (rows, cols, speedup etc.) in log.
     //      * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
-    
-    // FIXME: How to include benchmark with different name under the same header as the current format being benchmarked???
-    // FIXME: Does it matter that speedup show difference only between current test and first test?
-    //          Speedup shows difference between CPU and GPU-cuSPARSE, because in Benchmarks.h:
-    //              * If there is no baseTime, the resulting test time is set to baseTime.
-    //              * However, if there is a baseTime (from the CPU compared to GPU test),
-    //                  baseTime isn't changed. If we change it in Benchmarks.h to compare 
-    //                  the speedup from the last test, it will mess up BLAS benchmarks etc.
     benchmark.setMetadataColumns( Benchmark::MetadataColumns({
           { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
           { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
@@ -244,7 +234,6 @@ benchmarkSpMV( Benchmark & benchmark,
     resultcuSPARSEDeviceVector2 = deviceVector2;
  #endif
     
-//#ifdef COMPARE_RESULTS
     // Difference between GPU (curent format) and GPU-cuSPARSE results
     Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
     Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
@@ -274,15 +263,6 @@ benchmarkSpMV( Benchmark & benchmark,
     std::cout << GPUcuSparse_absMax << std::endl;
     std::cout << GPUcuSparse_lpNorm << std::endl;
     
-    // FIXME: This isn't an elegant solution, it makes the log file very long.
-//    benchmark.addErrorMessage( GPUcuSparse_absMax, 1 );
-//    benchmark.addErrorMessage( GPUcuSparse_lpNorm, 1 );
-    
-//    benchmark.addErrorMessage( CPUxGPU_absMax, 1 );
-//    benchmark.addErrorMessage( CPUxGPU_lpNorm, 1 );
-    
-//#endif
-    
     std::cout << std::endl;
     return true;
 }
@@ -295,15 +275,14 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
                         bool verboseMR )
 {
    bool result = true;
-   // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
-//   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );   
-//   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
-//   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR );
-//   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
+   result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );   
+   result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
+   result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR );
+   result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
    
-   // AdEllpack/BiEllpack doesn't have cross-device assignment ('= operator') implemented yet
-   result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
-//   result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
+   // AdEllpack is broken
+//   result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
+   result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
    return result;
 }
 
diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h
index 1fcbc1494d5cd133dcaad116b2c5efde33824515..34b081914604f949668d98a8ec5d7657c5ed4086 100644
--- a/src/TNL/Matrices/AdEllpack.h
+++ b/src/TNL/Matrices/AdEllpack.h
@@ -81,13 +81,12 @@ public:
             std::cout << "HEAD==TAIL" << std::endl;
         else
         {
-            // TEST
             for( warpInfo< MatrixType >* i = this->getHead(); i != this->getTail()->next; i = i->next )
             {
-                if( i == this->getHead() );
-//                    std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
-                else if( i == this->getTail() );
-//                    std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
+                if( i == this->getHead() )
+                    std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
+                else if( i == this->getTail() )
+                    std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
                 else
                     std::cout << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
             }
diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index dd8731fc66e4029acc192b392a73e7fb804e9acc..b01e9041e6a79d12f5ab3755e76340ea9dba135f 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -147,10 +147,6 @@ warpList< MatrixType >::~warpList()
         delete temp;
     }
     delete this->head;
-    
-    // TEST
-//    std::cout << "List destructor." << std::endl;
-//    this->printList();
 }
 
 
@@ -1169,46 +1165,31 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector,
 
     IndexType i = 0;
     IndexType elementPtr = this->offset[ warpIdx ] + inWarpIdx;
-    // Save the value, to save calling access every loop.
     const IndexType warpLoad = this->localLoad[ warpIdx ];
     
-    // The unroll factor is 4, therefore if a warp has less than 4 localLoad, it cannot be unrolled
-    //  and must be calculated separately.
     if( warpLoad < 4 )
     {
-        // While the helpful index of the warp localLoad is less than localLoad and the element index isn't
-        //  out of the matrix (would return the number of columns of the matrix)
         while( i < warpLoad &&
                this->columnIndexes[ elementPtr ] < this->getColumns() )
         {
             temp[ threadIdx.x ] += inVector[ this->columnIndexes[ elementPtr ] ] * this->values[ elementPtr ];
-            // For the current thread, shift the elements ptr by warpSize (to keep the thread on one row)
             elementPtr += this->warpSize;
-            i++; // Increment the helpful localLoad index.
+            i++;
         }
     }
-    else // If the localLoad of the warp is unrollable.
+    else
     {
-        // Is the warpLoad divisible by 4 (4 - 1 for binary AND).
-        //  This will return how far it is from being divisible:
-        //  For 0 & 3 = 0; 1 & 3 = 1; 2 & 3 = 2; 3 & 3 = 3; 4 & 3 = 0, etc.
         IndexType alignUnroll = warpLoad & 3;
         
-        // While the result of divisibility by 4 has not reached the point where it is divisble by 4.
         while( alignUnroll != 0 &&
                this->columnIndexes[ elementPtr ] < this->getColumns() )
         {        
                 temp[ threadIdx.x ] += inVector[ this->columnIndexes[ elementPtr ] ] * this->values[ elementPtr ];
                 elementPtr += this->warpSize;
                 i++;
-                // If alignUnroll not 0 (i.e. no. of NNZ elements is not divisible by 4), decrement alignUnroll until it is.
-                //  This will ensure that the i starting index with be incremented to the correct starting position for the unroll.
                 alignUnroll--;
         }
     }
-
-    // For those rows that have warpLoad < unroll factor, this for loop won't even get past the first condition.
-    //  Otherwise unroll.
     for( ; i < this->localLoad[ warpIdx ]; i += 4 )
     {
         #pragma unroll
@@ -1222,7 +1203,6 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector,
         }
     }
     
-    // What is going on here? DOCUMENT
     if( ( inWarpIdx == 0 ) || ( reduceMap[ threadIdx.x ] > reduceMap[ threadIdx.x - 1 ] ) )
     {
         IndexType end = ( warpIdx + 1 ) << 5;
@@ -1265,15 +1245,6 @@ void AdEllpack< Real, Device, Index >::spmvCuda16( const InVector& inVector,
     IndexType elementPtr = this->offset[ warpIdx ] + inWarpIdx;
     const IndexType warpLoad = this->localLoad[ warpIdx ];
     
-//    for( IndexType i = 0; i < warpLoad; i++ )
-//    {
-//        if( this->columnIndexes[ elementPtr ] < this->getColumns() )
-//        {
-//            temp[ threadIdx.x ] += this->values[ elementPtr] * inVector[ this->columnIndexes[ elementPtr ] ];
-//            elementPtr += this->warpSize;
-//        }
-//    }
-    
     if( warpLoad < 8 )
     {
         while( i < warpLoad &&
@@ -1496,7 +1467,6 @@ public:
 	InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector );
 	OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector );
         TNL_CHECK_CUDA_DEVICE;
-        std::cout << "totalLoad = " << matrix.totalLoad << std::endl;
 	if( matrix.totalLoad < 2 )
 	{
 	    dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() );
@@ -1520,7 +1490,7 @@ public:
 	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
-	else if( matrix.totalLoad < 4 ) // WORKS
+	else if( matrix.totalLoad < 4 )
 	{
 	    dim3 blockSize( 192 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
@@ -1543,7 +1513,7 @@ public:
 	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
-	else if( matrix.totalLoad < 8 ) // Maybe works?
+	else if( matrix.totalLoad < 8 )
 	{
 	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
@@ -1566,7 +1536,7 @@ public:
 	    Devices::Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
-	else if( matrix.totalLoad < 16 ) // BROKEN
+	else if( matrix.totalLoad < 16 )
 	{
 	    dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
@@ -1589,7 +1559,7 @@ public:
 	    Cuda::freeFromDevice( kernel_outVector );
 	    TNL_CHECK_CUDA_DEVICE;
 	}
-	else // BROKEN
+	else
 	{
 	    dim3 blockSize( 96 ), cudaGridSize( Cuda::getMaxGridSize() );
 	    IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x );
@@ -1606,7 +1576,7 @@ public:
                                                        kernel_outVector,
                                                        gridIdx );
 	    }
-	    TNL_CHECK_CUDA_DEVICE; // FREEZES right here on CHECK CUDA
+	    TNL_CHECK_CUDA_DEVICE;
 	    Devices::Cuda::freeFromDevice( kernel_this );
 	    Devices::Cuda::freeFromDevice( kernel_inVector );
 	    Devices::Cuda::freeFromDevice( kernel_outVector );
diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h
index bfb8a7e35aa677a1af434efea16d1982b27ec3ca..e20b5cd2307f0b749f18ebd252ab2e55c41422f9 100644
--- a/src/TNL/Matrices/BiEllpack_impl.h
+++ b/src/TNL/Matrices/BiEllpack_impl.h
@@ -802,10 +802,9 @@ template< typename Real,
 void BiEllpack< Real, Device, Index >::printValues() const
 {
     for( Index i = 0; i < this->values.getSize(); i++ ) {
-    // Random values are stored with the column index of getColumns(). e.g. a matrix has 4 columns, values are at column indexes 0, 1, 2, 3 and junk data at index 4.
-    if( this->columnIndexes.getElement( i ) != this->getColumns() )
-        std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) 
-         << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl;
+        if( this->columnIndexes.getElement( i ) != this->getColumns() )
+            std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) 
+             << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl;
     }
     
     for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) {
diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h
index 8891f5b935ece70662c2a17d2e7ca67c508af6aa..3164a7fff39bd9095f39b606cad43b8d32e7b898 100644
--- a/src/TNL/Matrices/CSR_impl.h
+++ b/src/TNL/Matrices/CSR_impl.h
@@ -124,41 +124,8 @@ template< typename Real,
 Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     // TODO: Fix/Implement
-    throw Exceptions::NotImplementedError( "CSR::getNonZeroRowLength is not implemented." );
-//    if( std::is_same< DeviceType, Devices::Host >::value )
-//    {
-//       ConstMatrixRow matrixRow = this->getRow( row );
-//       return matrixRow.getNonZeroElementsCount();
-//    }
-//    if( std::is_same< DeviceType, Devices::Cuda >::value )
-//    {
-//       IndexType *cols = new IndexType[4];
-//       RealType *vals = new RealType[4];
-//       for( int i = 0; i < 4; i++ )
-//       {
-//           cols[i] = i;
-//           vals[i] = 1.0;
-//       }
-//       ConstMatrixRow matrixRow(cols, vals, 4, 1);
-// //      ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
-//       // WHEN debugging with GDB:
-//       //  (gdb) p this->rowPointers[0]
-//       //    Could not find operator[].
-//       //  (gdb) p rowPointers.getElement(0)
-//       //    Attempt to take address of value not located in memory.
-//       IndexType resultHost ( 0 );
-//       IndexType* resultCuda = Cuda::passToDevice( resultHost );
-//       // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
-//       //          'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
-//       TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
-//       delete []cols;
-//       delete []vals;
-//       std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
-//       resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
-//       std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
-//       Cuda::freeFromDevice( resultCuda );
-//       return resultHost;
-//   }
+    TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
+    return 0;
 }
 
 template< typename Real,
@@ -223,13 +190,6 @@ bool CSR< Real, Device, Index >::addElementFast( const IndexType row,
                                                           const RealType& value,
                                                           const RealType& thisElementMultiplicator )
 {
-   /*TNL_ASSERT( row >= 0 && row < this->rows &&
-              column >= 0 && column <= this->rows,
-              std::cerr << " row = " << row
-                   << " column = " << column
-                   << " this->rows = " << this->rows
-                   << " this->columns = " << this-> columns );*/
-
    IndexType elementPtr = this->rowPointers[ row ];
    const IndexType rowEnd = this->rowPointers[ row + 1 ];
    IndexType col = 0;
diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h
index 32cfca2c4e68dea5a1ebc0c99753f1890eeacf9b..3826a8574fc302d86f363f1d7e72d58d89526303 100644
--- a/src/TNL/Matrices/ChunkedEllpack_impl.h
+++ b/src/TNL/Matrices/ChunkedEllpack_impl.h
@@ -275,22 +275,7 @@ template< typename Real,
 Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
 {
     ConstMatrixRow matrixRow = getRow( row );
-    return matrixRow.getNonZeroElementsCount( getType< Device >() );
-    
-//    IndexType elementCount ( 0 );
-//    ConstMatrixRow matrixRow = this->getRow( row );
-//    
-//    auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable
-//    {
-//        std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl;
-//        if( matrixRow.getElementValue( i ) !=  0.0 )
-//            elementCount++;
-//        
-//        std::cout << "End of lambda elementCount = " << elementCount << std::endl;
-//    };
-//   
-//    ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros );
-//    return elementCount;
+    return matrixRow.getNonZeroElementsCount( Device::getDeviceType() );
 }
 
 template< typename Real,
diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h
index d92bccc46b4908708b608bff7e36b974dcbf7131..b99dbc88b59d425e93cc7cde2ee3fe9f31064b0a 100644
--- a/src/TNL/Matrices/Ellpack_impl.h
+++ b/src/TNL/Matrices/Ellpack_impl.h
@@ -655,8 +655,6 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2
    // setLike does not work here due to different alignment on Cuda and Host
    this->rowLengths = matrix.rowLengths;
    this->setDimensions( matrix.getRows(), matrix.getColumns() );
-   
-//   std::cout << "DIMENSIONS set; after setDimensions in operator= cross-device" << std::endl;
 
    const int blockSize = 32;
    const int blocks = roundUpDivision( this->getRows(), blockSize );
diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h
index b3fb33856a812a0ff82eb26d134f411c62d5ed2f..dd6ddc07245150c859946e97b2fde3a66021aaa3 100644
--- a/src/TNL/Matrices/MatrixReader_impl.h
+++ b/src/TNL/Matrices/MatrixReader_impl.h
@@ -68,14 +68,8 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file,
 
    if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) )
       return false;
-
-//   std::cout << "  rowLengths sizeof: " << sizeof( rowLengths ) << std::endl;
-//   std::cout << "  rowLengths element sizeof: " << sizeof( rowLengths[0] ) << std::endl;
-//   std::cout << "  rowLengths getSize(): " << rowLengths.getSize() << std::endl;
    
    matrix.setCompressedRowLengths( rowLengths );
-   
-//   std::cout << "->CompressedRowLengths SET" << std::endl;
 
    if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) )
       return false;
@@ -347,8 +341,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
    Timer timer;
    timer.start();
    
-//   std::cout << "\nBefore while..." << std::endl;
-   
    while( std::getline( file, line ) )
    {
       if( line[ 0 ] == '%' ) continue;
@@ -380,8 +372,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
       }
    }
    
-//   std::cout << "\nAfter while..." << std::endl;
-   
    file.clear();
    long int fileSize = file.tellg();
    timer.stop();
@@ -390,8 +380,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file,
               << " -> " << timer.getRealTime()
               << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 ))  << "MB/s." << std::endl;
    
-//   std::cout << "->END of reading matrix elements from file" << std::endl;
-   
    return true;
 }
 
diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h
index 84d734a937bea3575ecaf737f14d400585fcca92..d1643db19a48dbf078fe04389e9cb2d061b28a26 100644
--- a/src/TNL/Matrices/Sparse_impl.h
+++ b/src/TNL/Matrices/Sparse_impl.h
@@ -109,19 +109,7 @@ template< typename Real,
           typename Index >
 void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements )
 {
-//    std::cout << "  Allocating matrix elements..." << std::endl;
-   // CHECKING: if the number of matrix elements is larger than the highest number the IndexType can go to?
-   // INT OVERFLOW
-    
-   // CORRECT? ELL stores certain matrices in such a way, which could cause the number of matrix elements 
-   //          to be greater than the maximum value IndexType can store, thus causing int overflow when 
-   //          creating the arrays "values" and "indexes".
-   //   PROBLEM: int can overflow in such a way that it is still positive, thus rendering this assert useless.
-   //       HOW FIX? Do we have to create special conditions for every format in its allocation method? We can't 
-   //                tell from within this method, if numberOfMatrixElements is an overflown value or not.
    TNL_ASSERT_GE( numberOfMatrixElements, 0, "Number of matrix elements must be non-negative." );
-    
-//   std::cout << "  numberOfMatrixElements = " << numberOfMatrixElements << std::endl;
    
    this->values.setSize( numberOfMatrixElements );
    this->columnIndexes.setSize( numberOfMatrixElements );
@@ -132,8 +120,6 @@ void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& num
     */
    if( numberOfMatrixElements > 0 )
       this->columnIndexes.setValue( this->columns );
-   
-//   std::cout << "->END OF allocateMatrixElements!!!" << std::endl;
 }
 
 template< typename Real,
diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h
index e870cd90545656d005066eade0c5f2a07773798b..8d9e9c727a0d88c40b90f0623d8b2ec8808e3f95 100644
--- a/src/UnitTests/Matrices/DenseMatrixTest.h
+++ b/src/UnitTests/Matrices/DenseMatrixTest.h
@@ -41,17 +41,14 @@ void host_test_GetType()
     EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) );
 }
 
-// QUESITON: Cant these two functions be combined into one? Because if no CUDA is present and we were to call
-//           CUDA into the function in the TEST, to be tested, then we could have a problem.
-
 template< typename MatrixCudaFloat, typename MatrixCudaInt >
 void cuda_test_GetType()
 {
     MatrixCudaFloat mtrxCudaFloat;
     MatrixCudaInt mtrxCudaInt;
 
-    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );    // This is mistakenly labeled in /src/TNL/Devices/Cuda.cpp
-    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );        // Should be Devices::Cuda
+    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) );
+    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) );
 }
 
 template< typename Matrix >
@@ -1397,7 +1394,6 @@ TYPED_TEST( MatrixTest, printTest )
 
 TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
 {
-//    test_GetMatrixProduct< Dense_host_int >();
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
@@ -1414,7 +1410,6 @@ TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host )
 #ifdef HAVE_CUDA
 TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda )
 {
-//    test_GetMatrixProduct< Dense_cuda_int >();
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp
index 8beaa5b296985c41107c7a881eea81d832f04a0c..ef5b28d240a65c5e26eb987c42b76688c59a8d87 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest.hpp
+++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp
@@ -28,12 +28,6 @@ void host_test_GetType()
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
     std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
-    
-//    MatrixHostFloat mtrxHostFloat;
-//    MatrixHostInt mtrxHostInt;
-//    
-//    EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::CSR< float, Devices::Host >" ) );
-//    EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::CSR< int, Devices::Host >" ) ); 
 }
 
 template< typename MatrixCudaFloat, typename MatrixCudaInt >
@@ -42,13 +36,7 @@ void cuda_test_GetType()
     bool testRan = false;
     EXPECT_TRUE( testRan );
     std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n";
-    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;
-    
-//    MatrixCudaFloat mtrxCudaFloat;
-//    MatrixCudaInt mtrxCudaInt;
-//    
-//    EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::CSR< float, Devices::Cuda >" ) );
-//    EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::CSR< int, Devices::Cuda >" ) );        
+    std::cerr << "This test has not been implemented properly yet.\n" << std::endl;    
 }
 
 template< typename Matrix >
@@ -224,7 +212,6 @@ void test_SetElement()
     
     typename Matrix::CompressedRowLengthsVector rowLengths;
     rowLengths.setSize( rows );
-//    rowLengths.setValue( 8 );
     rowLengths.setElement( 0, 4 );
     rowLengths.setElement( 1, 3 );
     rowLengths.setElement( 2, 8 );
@@ -614,7 +601,6 @@ void test_VectorProduct()
     using IndexType = typename Matrix::IndexType;
     using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
     
-// Matrix totalLoad (AdEll) = 1
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -668,7 +654,6 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_1.getElement( 3 ), 10 );
     
     
-// Matrix totalLoad (AdEll) = 2
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -723,7 +708,6 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_2.getElement( 3 ), 16 );
     
     
-// Matrix totalLoad (AdEll) = 3
 /*
  * Sets up the following 4x4 sparse matrix:
  *
@@ -777,7 +761,6 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_3.getElement( 3 ), 66 );
     
     
-// Matrix totalLoad (AdEll) = 4
 /*
  * Sets up the following 8x8 sparse matrix:
  *
@@ -856,7 +839,6 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_4.getElement( 7 ), 330 );
     
   
-// Matrix totalLoad (AdEll) = 5
 /*
  * Sets up the following 8x8 sparse matrix:
  *
@@ -942,9 +924,6 @@ void test_VectorProduct()
     EXPECT_EQ( outVector_5.getElement( 5 ), 224 );
     EXPECT_EQ( outVector_5.getElement( 6 ), 352 );
     EXPECT_EQ( outVector_5.getElement( 7 ), 520 );
-    
-    
-    // ONE MORE TEST HERE FOR 16X16
 }
 
 template< typename Matrix >
@@ -1054,19 +1033,6 @@ void test_OperatorEquals()
         *    | 22 23 24 25 26 27 28  1 |   8
         *    \ 29 30 31 32 33 34 35 36 /   8
         */
-       
-       /* Sorted BiELL:
-        * 
-        * 
-        *    / 22 23 24 25 26 27 28  1 \
-        *    | 29 30 31 32 33 34 35 36 |
-        *    | 16 17 18 19 20 21  1    |
-        *    |  1  2  3  4  5  1       |
-        *    | 11 12 13 14  1          |
-        *    |  8  9 10  1             |
-        *    |  6  7  1                |
-        *    \ 15  1                   /
-        */
 
         const IndexType m_rows = 8;
         const IndexType m_cols = 8;
@@ -1453,7 +1419,6 @@ void test_Print()
 
     std::cout.rdbuf(old_buf); //reset
     
-    //printed << printed.str() << std::endl;
     couted << "Row: 0 ->  Col:0->1	 Col:1->2	 Col:2->3\t\n"
                "Row: 1 ->  Col:3->4\t\n"
                "Row: 2 ->  Col:0->5	 Col:1->6	 Col:2->7\t\n"
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
index 33a2403bc6563dff52f27da860b3dcdd71bb81a9..aac3a41a8fc895aa0c48136ad6542c69c45cb839 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h
@@ -78,7 +78,6 @@ TYPED_TEST( AdEllpackMatrixTest, resetTest )
     test_Reset< AdEllpackMatrixType >();
 }
 
-// SUPPOSEDLY WORKING - localLoad, offset and rowOffset are seemingly random numbers in the head and tail of WarpList.
 TYPED_TEST( AdEllpackMatrixTest, setElementTest )
 {    
     using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType;
@@ -86,7 +85,6 @@ TYPED_TEST( AdEllpackMatrixTest, setElementTest )
     test_SetElement< AdEllpackMatrixType >();
 }
 
-// SUPPOSEDLY WORKING - localLoad, offset and rowOffset are seemingly random numbers in the head and tail of WarpList.
 TYPED_TEST( AdEllpackMatrixTest, addElementTest )
 {
     using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType;
@@ -94,7 +92,6 @@ TYPED_TEST( AdEllpackMatrixTest, addElementTest )
     test_AddElement< AdEllpackMatrixType >();
 }
 
-// SUPPOSEDLY WORKING - Tests take longer than expected. setElement takes 13ms, compared to SlicedEllpack's 2ms.
 TYPED_TEST( AdEllpackMatrixTest, setRowTest )
 {
     using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType;
@@ -102,7 +99,6 @@ TYPED_TEST( AdEllpackMatrixTest, setRowTest )
     test_SetRow< AdEllpackMatrixType >();
 }
 
-// WORKS FOR MATRICES up to 99x99, The rest have different results.
 TYPED_TEST( AdEllpackMatrixTest, vectorProductTest )
 {
     using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType;
@@ -110,7 +106,6 @@ TYPED_TEST( AdEllpackMatrixTest, vectorProductTest )
     test_VectorProduct< AdEllpackMatrixType >();
 }
 
-// TODO test
 TYPED_TEST( AdEllpackMatrixTest, operatorEqualsTest )
 {
     using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
index e8b4412559af2fff19c8c95330f7a0cfb2ff6d80..c55eb101f009e3b39ab3bf90d3b809d0d608d0ec 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h
@@ -57,7 +57,6 @@ using BiEllpackMatrixTypes = ::testing::Types
 
 TYPED_TEST_SUITE( BiEllpackMatrixTest, BiEllpackMatrixTypes);
 
-// WORKING
 TYPED_TEST( BiEllpackMatrixTest, setDimensionsTest )
 {
     using BiEllpackMatrixType = typename TestFixture::BiEllpackMatrixType;
diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
index 91bd7fa9790e03e6e2a20918057c2e6c8a10dd48..5ef97a1df004095479864ab9d4ddc89fdbb54a4d 100644
--- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
+++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h
@@ -24,10 +24,6 @@ protected:
    using ChunkedEllpackMatrixType = Matrix;
 };
 
-// columnIndexes of ChunkedEllpack appear to be broken, when printed, it prints out a bunch of 4s.
-// rowPointers have interesting elements? 0 18 36 42 54 72 96 126 162 204 256 when rows = 10, cols = 11; rowLengths = 3 3 1 2 3 4 5 6 7 8
-// and 0 52 103 154 205 256 when rows = 5, cols = 4; rowLengths = 3 3 3 3 3
-
 
 // types for which MatrixTest is instantiated
 using ChEllpackMatrixTypes = ::testing::Types