diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h
index 6b6a58f9a06ccdb8b8281bbd6ede523d189cf232..b510636f55d101492552fce1f2830d898f6c26cd 100644
--- a/src/TNL/Matrices/SparseMatrix.h
+++ b/src/TNL/Matrices/SparseMatrix.h
@@ -17,7 +17,7 @@ namespace TNL {
 namespace Matrices {
 
 template< typename Real,
-          template< typename, typename > class Segments,
+          template< typename Device_, typename Index_ > class Segments,
           typename Device = Devices::Host,
           typename Index = int,
           typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >,
diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp
index 37f59c0580881de1563d8c3bd1ddac281f9ce0b5..1c243bcea886d27b093daef4a1befe948953398a 100644
--- a/src/TNL/Matrices/SparseMatrix.hpp
+++ b/src/TNL/Matrices/SparseMatrix.hpp
@@ -10,7 +10,9 @@
 
 #pragma once
 
+#include <functional>
 #include <TNL/Matrices/SparseMatrix.h>
+#include <TNL/Algorithms/Reduction.h>
 
 namespace TNL {
 namespace Matrices {
@@ -192,6 +194,12 @@ Index
 SparseMatrix< Real, Segments, Device, Index, RealAllocator, IndexAllocator >::
 getNumberOfNonzeroMatrixElements() const
 {
+   const auto columns_view = this->columnIndexes.getConstView();
+   const IndexType paddingIndex = this->getPaddingIndex();
+   auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType {
+      return ( columns_view[ i ] != paddingIndex );
+   };
+   return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 );
 }
 
 template< typename Real,