Commit 6fc0cab5 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Fixing necessary condition for specialization of dense matrix vector multiplication.

parent 2c566727
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -105,7 +105,8 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
   };
   benchmark.time< Devices::Cuda >( reset, "GPU row", rowMajorMvCuda );

   //std::cerr << "Diff. = " << TNL::max( abs( outCudaVector2 - outCudaVector1 ) ) << std::endl;
   auto diff = TNL::max( abs( outCudaVector2 - outCudaVector1 ) );
   //std::cerr << outCudaVector1 << std::endl << outCudaVector2 << std::endl;

   rowMajorCudaMatrix.reset();
   columnMajorCudaMatrix.setDimensions( size, size );
@@ -123,6 +124,7 @@ benchmarkDenseMVSynthetic( Benchmark<> & benchmark,
   };
   benchmark.time< Devices::Cuda >( reset, "GPU cublas", mvCublas );

   //std::cerr << "Diff. = " << diff << std::endl;
#endif
}

+2 −1
Original line number Diff line number Diff line
@@ -724,7 +724,8 @@ vectorProduct( const InVector& inVector,
   if( end == 0 )
      end = this->getRows();

   if( std::is_same< DeviceType, Devices::Cuda >::value )
   if( std::is_same< DeviceType, Devices::Cuda >::value &&
      matrixMultiplicator == 1.0 && outVectorMultiplicator == 0.0 )
   {
#ifdef HAVE_CUDA
      if( Organization == Algorithms::Segments::ColumnMajorOrder )