Commit 4b5e0f74 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Adding reordering to tnlMatrix.

parent b08ccaf0
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -482,7 +482,7 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo

   Index desGridSize;
	desGridSize = this->numberOfGroups;
	desGridSize = (desGridSize < 16384) ? desGridSize : 16384;
	desGridSize = (desGridSize < 4096) ? desGridSize : 4096;

   cudaThreadSynchronize();
   int gridSize = (int) desGridSize;
@@ -490,11 +490,11 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo

   AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(),
											                                                         vec. getVector(),
                                                                                          nonzeroElements. getVector(),
                                                                                          nonzero_elements. getVector(),
                                                                                          columns. getVector(),
                                                                                          blockInfo. getVector(),
                                                                                          threads. getVector(),
											                                                         numberOfGroups );
                                                                                          block_info. getVector(),
                                                                                          threads_per_row. getVector(),
											                                                         number_of_groups );
    cudaThreadSynchronize();
    CHECK_CUDA_ERROR;
#else
+10 −0
Original line number Diff line number Diff line
@@ -66,6 +66,8 @@ class tnlCSRMatrix : public tnlMatrix< Real, Device, Index >

   Index getNonzeroElements() const;

   Index getNonzeroElementsInRow( const Index& row ) const;

   //! This method explicitly computes the number of the non-zero elements.
   Index checkNonzeroElements() const;

@@ -248,6 +250,14 @@ Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElements() const
	return nonzero_elements. getSize();
}

template< typename Real, tnlDevice Device, typename Index >
Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElementsInRow( const Index& row ) const
{
   tnlAssert( row >= 0 && row < this -> getSize(),
              cerr << "row = " << row << " this -> getSize() = " << this -> getSize() );
   return row_offsets[ row + 1 ] - row_offsets[ row ];
}

template< typename Real, tnlDevice Device, typename Index >
Index tnlCSRMatrix< Real, Device, Index > :: checkNonzeroElements() const
{
+51 −2
Original line number Diff line number Diff line
@@ -61,11 +61,15 @@ class tnlMatrix : public tnlObject
   //! Allocates the arrays for the non-zero elements
   virtual bool setNonzeroElements( int n ) = 0;

   virtual Index getNonzeroElementsInRow( const Index& row ) const = 0;

   //! Returns the number of the nonzero elements.
   virtual Index getNonzeroElements() const = 0;

   virtual Index getArtificialZeroElements() const;

   bool setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation );

   virtual Real getElement( Index row, Index column ) const = 0;

   //! Setting given element
@@ -105,6 +109,12 @@ class tnlMatrix : public tnlObject

   bool load( const tnlString& fileName );

   /*!
    * Computes permutation of the rows such that the rows would be
    * ordered decreasingly by the number of the non-zero elements.
    */
   bool reorderDecreasingly( const tnlLongVector< Real, tnlHost, Index >& permutation );

   virtual bool read( istream& str,
		                int verbose = 0 );

@@ -123,6 +133,8 @@ class tnlMatrix : public tnlObject
		                  bool& symmetric );

   Index size;

   tnlLongVector< Index, tnlDevice, Index > rowsReorderingPermutation;
};

template< typename Real, tnlDevice Device, typename Index >
@@ -130,7 +142,8 @@ ostream& operator << ( ostream& o_str, const tnlMatrix< Real, Device, Index >& A

template< typename Real, tnlDevice Device, typename Index >
tnlMatrix< Real, Device, Index > :: tnlMatrix( const tnlString& name )
: tnlObject( name )
: tnlObject( name ),
  rowsReorderingPermutation( "tnlMatrix::rowsReorderingPermutation" )
{
};

@@ -140,6 +153,15 @@ Index tnlMatrix< Real, Device, Index > :: getArtificialZeroElements() const
   return 0;
};

template< typename Real, tnlDevice Device, typename Index >
bool tnlMatrix< Real, Device, Index > :: setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation )
{
   if( ! rowsReorderingPermutation. setSize( reorderingPermutation. getSize() ) )
      return false;
   rowsReorderingPermutation = reorderingPermutation;
   return true;
};

template< typename Real, tnlDevice Device, typename Index >
bool tnlMatrix< Real, Device, Index > :: performSORIteration( const Real& omega,
                                                              const tnlLongVector< Real, Device, Index >& b,
@@ -334,6 +356,33 @@ bool tnlMatrix< Real, Device, Index > :: read( istream& file,
   return true;
}

template< typename Real, tnlDevice Device, typename Index >
bool tnlMatrix< Real, Device, Index > :: reorderDecreasingly( const tnlLongVector< Index, tnlHost, Index >& permutation )
{
   /*
    * We use bucketsort to sort the rows by the number of the non-zero elements.
    */
   if( ! permutation. setSize( this -> getSize() ) )
      return false;
   permutation. setValue( 0 );

   /*
    * The permutation vector is now used to compute the buckets
    */
   for( Index i = 0; i < this -> getSize(); i ++ )
      permutation[ this -> getNonzeroElementsInRow( i ) ] ++;

   tnlLongVector< Index, tnlHost, Index > buckets( "tnlMatrix::reorderDecreasingly:buckets" );
   buckets. setValue( 0 );

   buckets[ 0 ] = 0;
   for( Index i = 1; i < this -> getSize; i ++ )
      buckets[ i ] = buckets[ i - 1 ] + permutation[ i ];

   for( Index i = 1; i < this -> getSize(); i ++ )
      permutations[ buckets[ this -> getNonzeroElementsInRow( i ) ] ++ ] = i;
}

template< typename Real, tnlDevice Device, typename Index >
bool tnlMatrix< Real, Device, Index > :: draw( ostream& str,
		                                         const tnlString& format,
+10 −1
Original line number Diff line number Diff line
@@ -283,7 +283,16 @@ bool tnlRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatrix< Real
		return false;

	/****
	 *  First compute the number of non-zero elements in each row
	 * First prepare permutation of rows to allow some matrix reorderings.
	 */
	tnlLongVector< Index, tnlHost, Index > permutation( "tnlRgCSRMatrix::copyFrom:permutation" );
	if( ! permutation. setSize( this -> getSize() ) )
	   return false;



	/****
	 *  Now compute the number of non-zero elements in each row
	 *  and compute number of elements which are necessary allocate.
	 */
	Index total_elements( 0 );
+1 −1
Original line number Diff line number Diff line
@@ -135,7 +135,7 @@ void reductionBenchmark( const int size,
   device_vector = host_vector;

   T sum, min, max;
   const long int reducing_cycles( 10 );
   const long int reducing_cycles( 1 );

   tnlTimerCPU timer;
   timer. Reset();