Commit c2f4c4ab authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixing bug in tnlVector::operator !=.

parent 15dcbd43
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -142,7 +142,7 @@ template< typename Real,
   template< typename Vector >
bool tnlSharedVector< Real, Device, Index > :: operator != ( const Vector& vector ) const
{
   return tnlSharedArray< Real, Device, Index > :: operator == ( vector );
   return tnlSharedArray< Real, Device, Index > :: operator != ( vector );
}

template< typename Real,
+1 −1
Original line number Diff line number Diff line
@@ -127,7 +127,7 @@ template< typename Real,
   template< typename Vector >
bool tnlVector< Real, Device, Index > :: operator != ( const Vector& vector ) const
{
   return tnlArray< Real, Device, Index > :: operator == ( vector );
   return tnlArray< Real, Device, Index > :: operator != ( vector );
}

template< typename Real,
+25 −0
Original line number Diff line number Diff line
@@ -863,6 +863,31 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
                                 OutVector& outVector )
      {
         tnlMatrixVectorProductCuda( matrix, inVector, outVector );
         /*#ifdef HAVE_CUDA    
            typedef tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > Matrix;
            typedef typename Matrix::IndexType IndexType;
            Matrix* kernel_this = tnlCuda::passToDevice( matrix );
            InVector* kernel_inVector = tnlCuda::passToDevice( inVector );
            OutVector* kernel_outVector = tnlCuda::passToDevice( outVector );
            dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
            const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x );
            const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
            for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
            {
               if( gridIdx == cudaGrids - 1 )
                  cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize();
               tnlMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>>
                                              ( kernel_this,
                                                kernel_inVector,
                                                kernel_outVector,
                                                gridIdx );
               checkCudaDevice;
            }
            tnlCuda::freeFromDevice( kernel_this );
            tnlCuda::freeFromDevice( kernel_inVector );
            tnlCuda::freeFromDevice( kernel_outVector );
            checkCudaDevice;
         #endif*/
      }

};
+8 −0
Original line number Diff line number Diff line
@@ -222,6 +222,8 @@ int main( int argc, char* argv[] )
      }      
   }
   datasetSize = loops * elements * sizeof( double ) / oneGB;
   hostVector.setValue( 1.0 );
   deviceVector.setValue( 1.0 );
   cout << "Benchmarking SpMV on CPU: ";
   timer.reset();
   for( int i = 0; i < loops; i++ )
@@ -236,6 +238,12 @@ int main( int argc, char* argv[] )
   for( int i = 0; i < loops; i++ )
      deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
   timer.stop();
   //cout << hostVector2 << endl << deviceVector2 << endl;
      
   if( hostVector2 != deviceVector2 )
   {
      cerr << "Error in SliceEllpack Spmv kernel." << endl;
   }
   bandwidth = 2 * datasetSize / loops / timer.getTime();
   cout << timer.getTime() << " => " << bandwidth << " GB/s" << " speedup " << hostTime / timer.getTime() << endl;