diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt
index 3fc3755814aea28bec83143bdb55d5e0a5879b23..dbde4568f1d1076c3821ae222075d5afa832e83b 100644
--- a/src/UnitTests/Containers/CMakeLists.txt
+++ b/src/UnitTests/Containers/CMakeLists.txt
@@ -66,6 +66,20 @@ ELSE(  BUILD_CUDA )
                               tnl )
 ENDIF( BUILD_CUDA )
 
+IF( BUILD_CUDA )
+   CUDA_ADD_EXECUTABLE( MultireductionTest MultireductionTest.cu
+                        OPTIONS ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
+ELSE(  BUILD_CUDA )
+   ADD_EXECUTABLE( MultireductionTest MultireductionTest.cpp )
+   TARGET_COMPILE_OPTIONS( MultireductionTest PRIVATE ${CXX_TESTS_FLAGS} )
+   TARGET_LINK_LIBRARIES( MultireductionTest
+                              ${GTEST_BOTH_LIBRARIES}
+                              tnl )
+ENDIF( BUILD_CUDA )
+
 ADD_EXECUTABLE( StaticVectorTest StaticVectorTest.cpp )
 TARGET_COMPILE_OPTIONS( StaticVectorTest PRIVATE ${CXX_TESTS_FLAGS} )
 TARGET_LINK_LIBRARIES( StaticVectorTest
@@ -93,6 +107,7 @@ ADD_TEST( ArrayTest ${EXECUTABLE_OUTPUT_PATH}/ArrayTest${CMAKE_EXECUTABLE_SUFFIX
 ADD_TEST( ArrayViewTest ${EXECUTABLE_OUTPUT_PATH}/ArrayViewTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticArrayTest ${EXECUTABLE_OUTPUT_PATH}/StaticArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( VectorTest ${EXECUTABLE_OUTPUT_PATH}/VectorTest${CMAKE_EXECUTABLE_SUFFIX} )
+ADD_TEST( MultireductionTest ${EXECUTABLE_OUTPUT_PATH}/MultireductionTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( StaticVectorTest ${EXECUTABLE_OUTPUT_PATH}/StaticVectorTest${CMAKE_EXECUTABLE_SUFFIX} )
 #ADD_TEST( MultiArrayTest ${EXECUTABLE_OUTPUT_PATH}/MultiArrayTest${CMAKE_EXECUTABLE_SUFFIX} )
 
diff --git a/src/UnitTests/Containers/MultireductionTest.cpp b/src/UnitTests/Containers/MultireductionTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c36965c925e19f192cba55a7793f8a69f7410473
--- /dev/null
+++ b/src/UnitTests/Containers/MultireductionTest.cpp
@@ -0,0 +1 @@
+#include "MultireductionTest.h"
diff --git a/src/UnitTests/Containers/MultireductionTest.cu b/src/UnitTests/Containers/MultireductionTest.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c36965c925e19f192cba55a7793f8a69f7410473
--- /dev/null
+++ b/src/UnitTests/Containers/MultireductionTest.cu
@@ -0,0 +1 @@
+#include "MultireductionTest.h"
diff --git a/src/UnitTests/Containers/MultireductionTest.h b/src/UnitTests/Containers/MultireductionTest.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f3c26fc46c440bdc117f85ea4bd0dbc85a96d99
--- /dev/null
+++ b/src/UnitTests/Containers/MultireductionTest.h
@@ -0,0 +1,132 @@
+/***************************************************************************
+                          MultireductionTest.h  -  description
+                             -------------------
+    begin                : Oct 1, 2018
+    copyright            : (C) 2018 by Tomas Oberhuber et al.
+    email                : tomas.oberhuber@fjfi.cvut.cz
+ ***************************************************************************/
+
+/* See Copyright Notice in tnl/Copyright */
+
+#pragma once
+
+#ifdef HAVE_GTEST
+#include "gtest/gtest.h"
+
+#include <TNL/Containers/Vector.h>
+#include <TNL/Containers/VectorView.h>
+#include <TNL/Containers/Algorithms/Multireduction.h>
+
+using namespace TNL;
+using namespace TNL::Containers;
+using namespace TNL::Containers::Algorithms;
+
+template< typename View >
+void setLinearSequence( View& deviceVector )
+{
+   using HostVector = Containers::Vector< typename View::RealType, Devices::Host, typename View::IndexType >;
+   HostVector a;
+   a.setLike( deviceVector );
+   for( int i = 0; i < a.getSize(); i++ )
+      a[ i ] = i;
+   deviceVector = a;
+}
+
+template< typename View >
+void setNegativeLinearSequence( View& deviceVector )
+{
+   using HostVector = Containers::Vector< typename View::RealType, Devices::Host, typename View::IndexType >;
+   HostVector a;
+   a.setLike( deviceVector );
+   for( int i = 0; i < a.getSize(); i++ )
+      a[ i ] = -i;
+   deviceVector = a;
+}
+
+// test fixture for typed tests
+template< typename Vector >
+class MultireductionTest : public ::testing::Test
+{
+protected:
+   using DeviceVector = Vector;
+   using DeviceView = VectorView< typename Vector::RealType, typename Vector::DeviceType, typename Vector::IndexType >;
+   using HostVector = typename DeviceVector::HostType;
+   using HostView = typename DeviceView::HostType;
+
+   // should be small enough to have fast tests, but larger than minGPUReductionDataSize
+   // and large enough to require multiple CUDA blocks for reduction
+   static constexpr int size = 5000;
+
+   // number of vectors which are reduced together
+   static constexpr int n = 4;
+
+   DeviceVector V;
+   DeviceVector y;
+   HostVector result;
+
+   MultireductionTest()
+   {
+      V.setSize( size * n );
+      y.setSize( size );
+      result.setSize( n );
+
+      for( int i = 0; i < n; i++ ) {
+         DeviceView v( &V[ i * size ], size );
+         if( i % 2 == 0 )
+            setLinearSequence( v );
+         else
+            setNegativeLinearSequence( v );
+      }
+      y.setValue( 1 );
+   }
+};
+
+// types for which MultireductionTest is instantiated
+using VectorTypes = ::testing::Types<
+   Vector< int,   Devices::Host >,
+   Vector< float, Devices::Host >
+#ifdef HAVE_CUDA
+   ,
+   Vector< int,   Devices::Cuda >,
+   Vector< float, Devices::Cuda >
+#endif
+>;
+
+TYPED_TEST_CASE( MultireductionTest, VectorTypes );
+
+TYPED_TEST( MultireductionTest, scalarProduct )
+{
+   using RealType = typename TestFixture::DeviceVector::RealType;
+   using DeviceType = typename TestFixture::DeviceVector::DeviceType;
+
+   ParallelReductionScalarProduct< RealType, RealType > scalarProduct;
+   Multireduction< DeviceType >::reduce
+               ( scalarProduct,
+                 this->n,
+                 this->size,
+                 this->V.getData(),
+                 this->size,
+                 this->y.getData(),
+                 this->result.getData() );
+
+   for( int i = 0; i < this->n; i++ ) {
+      if( i % 2 == 0 )
+         EXPECT_EQ( this->result[ i ], 0.5 * this->size * ( this->size - 1 ) );
+      else
+         EXPECT_EQ( this->result[ i ], - 0.5 * this->size * ( this->size - 1 ) );
+   }
+}
+
+#endif // HAVE_GTEST
+
+
+#include "../GtestMissingError.h"
+int main( int argc, char* argv[] )
+{
+#ifdef HAVE_GTEST
+   ::testing::InitGoogleTest( &argc, argv );
+   return RUN_ALL_TESTS();
+#else
+   throw GtestMissingError();
+#endif
+}
diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h
index 47d3908921dcdcdda5689df8ba8e4e424b84c2f2..5683ff17e28a9ff72342f817c648e555fbe02945 100644
--- a/src/UnitTests/Containers/VectorTest.h
+++ b/src/UnitTests/Containers/VectorTest.h
@@ -23,8 +23,8 @@
 
 using namespace TNL;
 using namespace TNL::Containers;
-using namespace TNL::Arithmetics; 
 using namespace TNL::Containers::Algorithms;
+using namespace TNL::Arithmetics;
 
 // should be small enough to have fast tests, but larger than minGPUReductionDataSize
 // and large enough to require multiple CUDA blocks for reduction