Loading src/matrix/tnlAdaptiveRgCSRMatrix.h +6 −6 Original line number Diff line number Diff line Loading @@ -482,7 +482,7 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo Index desGridSize; desGridSize = this->numberOfGroups; desGridSize = (desGridSize < 16384) ? desGridSize : 16384; desGridSize = (desGridSize < 4096) ? desGridSize : 4096; cudaThreadSynchronize(); int gridSize = (int) desGridSize; Loading @@ -490,11 +490,11 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), vec. getVector(), nonzeroElements. getVector(), nonzero_elements. getVector(), columns. getVector(), blockInfo. getVector(), threads. getVector(), numberOfGroups ); block_info. getVector(), threads_per_row. getVector(), number_of_groups ); cudaThreadSynchronize(); CHECK_CUDA_ERROR; #else Loading src/matrix/tnlCSRMatrix.h +10 −0 Original line number Diff line number Diff line Loading @@ -66,6 +66,8 @@ class tnlCSRMatrix : public tnlMatrix< Real, Device, Index > Index getNonzeroElements() const; Index getNonzeroElementsInRow( const Index& row ) const; //! This method explicitly computes the number of the non-zero elements. Index checkNonzeroElements() const; Loading Loading @@ -248,6 +250,14 @@ Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElements() const return nonzero_elements. getSize(); } template< typename Real, tnlDevice Device, typename Index > Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElementsInRow( const Index& row ) const { tnlAssert( row >= 0 && row < this -> getSize(), cerr << "row = " << row << " this -> getSize() = " << this -> getSize() ); return row_offsets[ row + 1 ] - row_offsets[ row ]; } template< typename Real, tnlDevice Device, typename Index > Index tnlCSRMatrix< Real, Device, Index > :: checkNonzeroElements() const { Loading src/matrix/tnlMatrix.h +51 −2 Original line number Diff line number Diff line Loading @@ -61,11 +61,15 @@ class tnlMatrix : public tnlObject //! Allocates the arrays for the non-zero elements virtual bool setNonzeroElements( int n ) = 0; virtual Index getNonzeroElementsInRow( const Index& row ) const = 0; //! Returns the number of the nonzero elements. virtual Index getNonzeroElements() const = 0; virtual Index getArtificialZeroElements() const; bool setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation ); virtual Real getElement( Index row, Index column ) const = 0; //! Setting given element Loading Loading @@ -105,6 +109,12 @@ class tnlMatrix : public tnlObject bool load( const tnlString& fileName ); /*! * Computes permutation of the rows such that the rows would be * ordered decreasingly by the number of the non-zero elements. */ bool reorderDecreasingly( const tnlLongVector< Real, tnlHost, Index >& permutation ); virtual bool read( istream& str, int verbose = 0 ); Loading @@ -123,6 +133,8 @@ class tnlMatrix : public tnlObject bool& symmetric ); Index size; tnlLongVector< Index, tnlDevice, Index > rowsReorderingPermutation; }; template< typename Real, tnlDevice Device, typename Index > Loading @@ -130,7 +142,8 @@ ostream& operator << ( ostream& o_str, const tnlMatrix< Real, Device, Index >& A template< typename Real, tnlDevice Device, typename Index > tnlMatrix< Real, Device, Index > :: tnlMatrix( const tnlString& name ) : tnlObject( name ) : tnlObject( name ), rowsReorderingPermutation( "tnlMatrix::rowsReorderingPermutation" ) { }; Loading @@ -140,6 +153,15 @@ Index tnlMatrix< Real, Device, Index > :: getArtificialZeroElements() const return 0; }; template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation ) { if( ! rowsReorderingPermutation. setSize( reorderingPermutation. getSize() ) ) return false; rowsReorderingPermutation = reorderingPermutation; return true; }; template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: performSORIteration( const Real& omega, const tnlLongVector< Real, Device, Index >& b, Loading Loading @@ -334,6 +356,33 @@ bool tnlMatrix< Real, Device, Index > :: read( istream& file, return true; } template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: reorderDecreasingly( const tnlLongVector< Index, tnlHost, Index >& permutation ) { /* * We use bucketsort to sort the rows by the number of the non-zero elements. */ if( ! permutation. setSize( this -> getSize() ) ) return false; permutation. setValue( 0 ); /* * The permutation vector is now used to compute the buckets */ for( Index i = 0; i < this -> getSize(); i ++ ) permutation[ this -> getNonzeroElementsInRow( i ) ] ++; tnlLongVector< Index, tnlHost, Index > buckets( "tnlMatrix::reorderDecreasingly:buckets" ); buckets. setValue( 0 ); buckets[ 0 ] = 0; for( Index i = 1; i < this -> getSize; i ++ ) buckets[ i ] = buckets[ i - 1 ] + permutation[ i ]; for( Index i = 1; i < this -> getSize(); i ++ ) permutations[ buckets[ this -> getNonzeroElementsInRow( i ) ] ++ ] = i; } template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: draw( ostream& str, const tnlString& format, Loading src/matrix/tnlRgCSRMatrix.h +10 −1 Original line number Diff line number Diff line Loading @@ -283,7 +283,16 @@ bool tnlRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatrix< Real return false; /**** * First compute the number of non-zero elements in each row * First prepare permutation of rows to allow some matrix reorderings. */ tnlLongVector< Index, tnlHost, Index > permutation( "tnlRgCSRMatrix::copyFrom:permutation" ); if( ! permutation. setSize( this -> getSize() ) ) return false; /**** * Now compute the number of non-zero elements in each row * and compute number of elements which are necessary allocate. */ Index total_elements( 0 ); Loading tests/tnl-benchmarks.h +1 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ void reductionBenchmark( const int size, device_vector = host_vector; T sum, min, max; const long int reducing_cycles( 10 ); const long int reducing_cycles( 1 ); tnlTimerCPU timer; timer. Reset(); Loading Loading
src/matrix/tnlAdaptiveRgCSRMatrix.h +6 −6 Original line number Diff line number Diff line Loading @@ -482,7 +482,7 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo Index desGridSize; desGridSize = this->numberOfGroups; desGridSize = (desGridSize < 16384) ? desGridSize : 16384; desGridSize = (desGridSize < 4096) ? desGridSize : 4096; cudaThreadSynchronize(); int gridSize = (int) desGridSize; Loading @@ -490,11 +490,11 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), vec. getVector(), nonzeroElements. getVector(), nonzero_elements. getVector(), columns. getVector(), blockInfo. getVector(), threads. getVector(), numberOfGroups ); block_info. getVector(), threads_per_row. getVector(), number_of_groups ); cudaThreadSynchronize(); CHECK_CUDA_ERROR; #else Loading
src/matrix/tnlCSRMatrix.h +10 −0 Original line number Diff line number Diff line Loading @@ -66,6 +66,8 @@ class tnlCSRMatrix : public tnlMatrix< Real, Device, Index > Index getNonzeroElements() const; Index getNonzeroElementsInRow( const Index& row ) const; //! This method explicitly computes the number of the non-zero elements. Index checkNonzeroElements() const; Loading Loading @@ -248,6 +250,14 @@ Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElements() const return nonzero_elements. getSize(); } template< typename Real, tnlDevice Device, typename Index > Index tnlCSRMatrix< Real, Device, Index > :: getNonzeroElementsInRow( const Index& row ) const { tnlAssert( row >= 0 && row < this -> getSize(), cerr << "row = " << row << " this -> getSize() = " << this -> getSize() ); return row_offsets[ row + 1 ] - row_offsets[ row ]; } template< typename Real, tnlDevice Device, typename Index > Index tnlCSRMatrix< Real, Device, Index > :: checkNonzeroElements() const { Loading
src/matrix/tnlMatrix.h +51 −2 Original line number Diff line number Diff line Loading @@ -61,11 +61,15 @@ class tnlMatrix : public tnlObject //! Allocates the arrays for the non-zero elements virtual bool setNonzeroElements( int n ) = 0; virtual Index getNonzeroElementsInRow( const Index& row ) const = 0; //! Returns the number of the nonzero elements. virtual Index getNonzeroElements() const = 0; virtual Index getArtificialZeroElements() const; bool setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation ); virtual Real getElement( Index row, Index column ) const = 0; //! Setting given element Loading Loading @@ -105,6 +109,12 @@ class tnlMatrix : public tnlObject bool load( const tnlString& fileName ); /*! * Computes permutation of the rows such that the rows would be * ordered decreasingly by the number of the non-zero elements. */ bool reorderDecreasingly( const tnlLongVector< Real, tnlHost, Index >& permutation ); virtual bool read( istream& str, int verbose = 0 ); Loading @@ -123,6 +133,8 @@ class tnlMatrix : public tnlObject bool& symmetric ); Index size; tnlLongVector< Index, tnlDevice, Index > rowsReorderingPermutation; }; template< typename Real, tnlDevice Device, typename Index > Loading @@ -130,7 +142,8 @@ ostream& operator << ( ostream& o_str, const tnlMatrix< Real, Device, Index >& A template< typename Real, tnlDevice Device, typename Index > tnlMatrix< Real, Device, Index > :: tnlMatrix( const tnlString& name ) : tnlObject( name ) : tnlObject( name ), rowsReorderingPermutation( "tnlMatrix::rowsReorderingPermutation" ) { }; Loading @@ -140,6 +153,15 @@ Index tnlMatrix< Real, Device, Index > :: getArtificialZeroElements() const return 0; }; template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: setRowsReordering( const tnlLongVector< Index, tnlDevice, Index >& reorderingPermutation ) { if( ! rowsReorderingPermutation. setSize( reorderingPermutation. getSize() ) ) return false; rowsReorderingPermutation = reorderingPermutation; return true; }; template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: performSORIteration( const Real& omega, const tnlLongVector< Real, Device, Index >& b, Loading Loading @@ -334,6 +356,33 @@ bool tnlMatrix< Real, Device, Index > :: read( istream& file, return true; } template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: reorderDecreasingly( const tnlLongVector< Index, tnlHost, Index >& permutation ) { /* * We use bucketsort to sort the rows by the number of the non-zero elements. */ if( ! permutation. setSize( this -> getSize() ) ) return false; permutation. setValue( 0 ); /* * The permutation vector is now used to compute the buckets */ for( Index i = 0; i < this -> getSize(); i ++ ) permutation[ this -> getNonzeroElementsInRow( i ) ] ++; tnlLongVector< Index, tnlHost, Index > buckets( "tnlMatrix::reorderDecreasingly:buckets" ); buckets. setValue( 0 ); buckets[ 0 ] = 0; for( Index i = 1; i < this -> getSize; i ++ ) buckets[ i ] = buckets[ i - 1 ] + permutation[ i ]; for( Index i = 1; i < this -> getSize(); i ++ ) permutations[ buckets[ this -> getNonzeroElementsInRow( i ) ] ++ ] = i; } template< typename Real, tnlDevice Device, typename Index > bool tnlMatrix< Real, Device, Index > :: draw( ostream& str, const tnlString& format, Loading
src/matrix/tnlRgCSRMatrix.h +10 −1 Original line number Diff line number Diff line Loading @@ -283,7 +283,16 @@ bool tnlRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatrix< Real return false; /**** * First compute the number of non-zero elements in each row * First prepare permutation of rows to allow some matrix reorderings. */ tnlLongVector< Index, tnlHost, Index > permutation( "tnlRgCSRMatrix::copyFrom:permutation" ); if( ! permutation. setSize( this -> getSize() ) ) return false; /**** * Now compute the number of non-zero elements in each row * and compute number of elements which are necessary allocate. */ Index total_elements( 0 ); Loading
tests/tnl-benchmarks.h +1 −1 Original line number Diff line number Diff line Loading @@ -135,7 +135,7 @@ void reductionBenchmark( const int size, device_vector = host_vector; T sum, min, max; const long int reducing_cycles( 10 ); const long int reducing_cycles( 1 ); tnlTimerCPU timer; timer. Reset(); Loading