Commit 31ee230c authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Fixing a bug in host and device arrays comparison.

parent 994ee56d
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -256,6 +256,9 @@ bool tnlArrayOperations< tnlHost, tnlCuda >::compareMemory( const Element1* dest
                                                            const Element2* source,
                                                            const Index size )
{
   /***
    * Here, destination is on host and source is on CUDA device.
    */
   tnlAssert( destination, );
   tnlAssert( source, );
   tnlAssert( size >= 0, cerr << "size = " << size );
@@ -280,7 +283,7 @@ bool tnlArrayOperations< tnlHost, tnlCuda >::compareMemory( const Element1* dest
         delete[] host_buffer;
         return false;
      }
      if( ! tnlArrayOperations< tnlHost >::compareMemory( host_buffer, destination, transfer ) )
      if( ! tnlArrayOperations< tnlHost >::compareMemory( &destination[ compared ], host_buffer, transfer ) )
      {
         delete[] host_buffer;
         return false;
+12 −9
Original line number Diff line number Diff line
@@ -687,12 +687,12 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost >
                                 const InVector& inVector,
                                 OutVector& outVector )
      {
//#ifdef HAVE_OPENMP
//#pragma omp parallel for
//#endif           
//         for( Index row = 0; row < matrix.getRows(); row ++ )
//            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
         Index col;
#ifdef HAVE_OPENMP
#pragma omp parallel for
#endif           
         for( Index row = 0; row < matrix.getRows(); row ++ )
            outVector[ row ] = matrix.rowVectorProduct( row, inVector );
         /*Index col;
         for( Index row = 0; row < matrix.getRows(); row ++ )
         {
            outVector[ row ] = 0.0;
@@ -700,8 +700,7 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost >
            for( Index i = row * matrix.rowLengths; i < rowEnd; i++ )
               if( ( col = matrix.columnIndexes[ i ] ) < matrix.columns )
                  outVector[ row ] += matrix.values[ i ] * inVector[ col ];
         }

         }*/
      }
};

@@ -714,6 +713,7 @@ __global__ void tnlEllpackMatrixVectorProductCudaKernel(
   const Index columns,
   const Index compressedRowsLengths,
   const Index alignedRows,
   const Index paddingIndex,
   const Index* columnIndexes,
   const Real* values,
   const Real* inVector,
@@ -727,7 +727,9 @@ __global__ void tnlEllpackMatrixVectorProductCudaKernel(
   Index el( 0 );
   Real result( 0.0 );
   Index columnIndex;
   while( el++ < compressedRowsLengths && ( columnIndex = columnIndexes[ i ] ) < columns )
   while( el++ < compressedRowsLengths && 
          ( columnIndex = columnIndexes[ i ] ) < columns &&
          columnIndex != paddingIndex )
   {
      result += values[ i ] * inVector[ columnIndex ];
      i += alignedRows;
@@ -800,6 +802,7 @@ class tnlEllpackMatrixDeviceDependentCode< tnlCuda >
                  matrix.getColumns(),
                  matrix.rowLengths,
                  matrix.alignedRows,
                  matrix.getPaddingIndex(),
                  matrix.columnIndexes.getData(),
                  matrix.values.getData(),
                  inVector.getData(),
+5 −1
Original line number Diff line number Diff line
@@ -787,6 +787,7 @@ __global__ void tnlSlicedEllpackMatrixVectorProductCudaKernel(
   const Index columns,
   const Index* slicePointers,
   const Index* sliceCompressedRowsLengths,
   const Index paddingIndex,
   const Index* columnIndexes,
   const Real* values,
   const Real* inVector,
@@ -803,7 +804,9 @@ __global__ void tnlSlicedEllpackMatrixVectorProductCudaKernel(
   const Index rowEnd = i + rowLength * SliceSize;
   Real result( 0.0 );
   Index columnIndex;
   while( i < rowEnd && ( columnIndex = columnIndexes[ i ] ) < columns )
   while( i < rowEnd &&
         ( columnIndex = columnIndexes[ i ] ) < columns &&
         columnIndex < paddingIndex )
   {
      result += values[ i ] * inVector[ columnIndex ];
      i += SliceSize;
@@ -919,6 +922,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
                  matrix.getColumns(),
                  matrix.slicePointers.getData(),
                  matrix.sliceCompressedRowsLengths.getData(),
                  matrix.getPaddingIndex(),
                  matrix.columnIndexes.getData(),
                  matrix.values.getData(),
                  inVector.getData(),
+12 −15
Original line number Diff line number Diff line
@@ -36,11 +36,12 @@ __global__ void setCudaTestMatrixKernel( Matrix* matrix,
   const int rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x;
   if( rowIdx >= matrix->getRows() )
      return;
   int col = Max( 0, rowIdx - elementsPerRow / 2 );   
   int col = rowIdx - elementsPerRow / 2;   
   for( int element = 0; element < elementsPerRow; element++ )
   {
      if( col + element < matrix->getColumns() )
         matrix->setElementFast( rowIdx, col + element, 1.0 );
      if( col + element >= 0 &&
          col + element < matrix->getColumns() )
         matrix->setElementFast( rowIdx, col + element, element + 1 );
   }      
}

@@ -152,7 +153,7 @@ int main( int argc, char* argv[] )
   timer.reset();
   timer.start();
   for( int i = 0; i < loops; i++ )
      resultDevice = deviceVector.scalarProduct( deviceVector );
      resultDevice = deviceVector.scalarProduct( deviceVector2 );
   timer.stop();
   bandwidth = 2 * datasetSize / timer.getTime();
   cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl;
@@ -194,6 +195,7 @@ int main( int argc, char* argv[] )
   timer.start();
   for( int i = 0; i < loops; i++ )
      resultDevice = deviceVector.lpNorm( 2.0 );

   timer.stop();
   bandwidth = datasetSize / timer.getTime();
   cout << "Time: " << timer.getTime() << " bandwidth: " << bandwidth << " GB/sec." << endl;
@@ -254,12 +256,12 @@ int main( int argc, char* argv[] )
   {
      if( row % 100 == 0 )
         cout << "Row " << row << "/" << size << "     \r" << flush;
      int col = Max( 0, row - elementsPerRow / 2 );   
      int col = row - elementsPerRow / 2;   
      for( int element = 0; element < elementsPerRow; element++ )
      {
         if( col + element < size )
         if( col + element >= 0 && col + element < size )
         {
            hostMatrix.setElement( row, col + element, 1.0 );
            hostMatrix.setElement( row, col + element, element + 1 );
            //deviceMatrix.setElement( row, col + element, 1.0 );
            elements++;
         }
@@ -280,20 +282,15 @@ int main( int argc, char* argv[] )
   cout << timer.getTime() << " => " << bandwidth << " GB/s" << endl;
   
   cout << "Benchmarking SpMV on GPU: ";
   deviceVector2.setValue( 0.0 );
   timer.reset();
   for( int i = 0; i < loops; i++ )
      deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
   timer.stop();
   //cout << hostVector2 << endl << deviceVector2 << endl;
      
   if( hostVector2 != deviceVector2 )
   {      
      cerr << "Error in SliceEllpack Spmv kernel at positions" << endl;
      //for( int i = 0; i < size; i++ )
      //   if( hostVector2.getElement( i ) != deviceVector2.getElement( i ) )
      //      cerr << " " << i;
      cerr << "Error in Spmv kernel" << endl;
   
   }
   bandwidth = datasetSize / timer.getTime();
   cout << timer.getTime() << " => " << bandwidth << " GB/s" << " speedup " << hostTime / timer.getTime() << endl;
   
+1 −1

File changed.

Contains only whitespace changes.