Loading src/implementation/matrices/tnlCSRMatrix_impl.h +2 −2 Original line number Diff line number Diff line Loading @@ -370,8 +370,8 @@ typename Vector::RealType tnlCSRMatrix< Real, Device, Index >::rowVectorProduct( const IndexType rowEnd = this->rowPointers[ row + 1 ]; while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < this->columns ) { const Index column = this->columnIndexes.getElement( elementPtr ); result += this->values.getElement( elementPtr++ ) * vector.getElement( column ); const Index column = this->columnIndexes[ elementPtr ]; result += this->values[ elementPtr++ ] * vector[ column ]; } return result; } Loading src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +28 −20 Original line number Diff line number Diff line Loading @@ -120,7 +120,7 @@ void tnlChunkedEllpackMatrix< Real, Device, Index >::resolveSliceSizes( const tn sliceSize++; row++; if( allocatedElementsInSlice < desiredElementsInSlice ) if( row < this->rows - 1 && sliceSize < chunksInSlice ) continue; if( row < this->rows && sliceSize < chunksInSlice ) continue; tnlAssert( sliceSize >0, ); this->slices[ numberOfSlices ].size = sliceSize; this->slices[ numberOfSlices ].firstRow = row - sliceSize; Loading Loading @@ -153,20 +153,27 @@ bool tnlChunkedEllpackMatrix< Real, Device, Index >::setSlice( const RowLengthsV IndexType freeChunks = this->chunksInSlice - sliceSize; for( IndexType i = sliceBegin; i < sliceEnd; i++ ) this->rowToChunkMapping.setElement( i, 1 ); while( freeChunks ) { for( IndexType i = sliceBegin; i < sliceEnd && freeChunks > 0; i++ ) int totalAddedChunks( 0 ); int maxRowLength( rowLengths[ sliceBegin ] ); for( IndexType i = sliceBegin; i < sliceEnd; i++ ) { RealType rowRatio( 0.0 ); double rowRatio( 0.0 ); if( allocatedElementsInSlice != 0 ) rowRatio = ( RealType ) rowLengths[ i ] / ( RealType ) allocatedElementsInSlice; const IndexType addedChunks = ceil( freeChunks * rowRatio ); freeChunks -= addedChunks; rowRatio = ( double ) rowLengths[ i ] / ( double ) allocatedElementsInSlice; const IndexType addedChunks = freeChunks * rowRatio; totalAddedChunks += addedChunks; this->rowToChunkMapping[ i ] += addedChunks; tnlAssert( rowToChunkMapping[ i ] > 0, cerr << " rowToChunkMapping[ i ] = " << rowToChunkMapping[ i ] << endl ); if( maxRowLength < rowLengths[ i ] ) maxRowLength = rowLengths[ i ]; } tnlAssert( freeChunks >= 0, ); freeChunks -= totalAddedChunks; while( freeChunks ) for( IndexType i = sliceBegin; i < sliceEnd && freeChunks; i++ ) if( rowLengths[ i ] == maxRowLength ) { this->rowToChunkMapping[ i ]++; freeChunks--; } /**** Loading Loading @@ -283,6 +290,7 @@ template< typename Real, void tnlChunkedEllpackMatrix< Real, Device, Index >::reset() { tnlSparseMatrix< Real, Device, Index >::reset(); this->slices.reset(); this->rowToChunkMapping.reset(); this->rowToSliceMapping.reset(); } Loading Loading @@ -677,7 +685,7 @@ bool tnlChunkedEllpackMatrix< Real, Device, Index >::setRow( const IndexType row cerr << " row = " << row << " this->rows = " << this->rows ); const IndexType& sliceIndex = rowToSliceMapping.getElement( row ); const IndexType sliceIndex = rowToSliceMapping.getElement( row ); tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices.getElement( sliceIndex ).firstRow ) Loading Loading @@ -777,7 +785,7 @@ template< typename Real, Real tnlChunkedEllpackMatrix< Real, Device, Index >::getElementFast( const IndexType row, const IndexType column ) const { const IndexType& sliceIndex = rowToSliceMapping[ row ]; const IndexType sliceIndex = rowToSliceMapping[ row ]; tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices[ sliceIndex ].firstRow ) Loading Loading @@ -998,7 +1006,7 @@ typename Vector::RealType tnlChunkedEllpackMatrix< Real, Device, Index >::rowVec /*tnlAssert( row >=0 && row < this->rows, cerr << " row = " << row << " this->rows = " << this->rows );*/ const IndexType& sliceIndex = rowToSliceMapping[ row ]; const IndexType sliceIndex = rowToSliceMapping[ row ]; //tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices[ sliceIndex ].firstRow ) Loading Loading @@ -1066,12 +1074,12 @@ __device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVect if( threadIdx.x == 0 ) ( *sliceInfo ) = this->slices[ sliceIdx ]; __syncthreads; __syncthreads(); chunkProducts[ threadIdx.x ] = this->chunkVectorProduct( sliceInfo->pointer, threadIdx.x, sliceInfo->chunkSize, *inVector ); __syncthreads; __syncthreads(); if( threadIdx.x < sliceInfo->size ) { const IndexType row = sliceInfo->firstRow + threadIdx.x; Loading Loading @@ -1293,7 +1301,7 @@ class tnlChunkedEllpackMatrixDeviceDependentCode< tnlHost > Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector.setElement( row, matrix.rowVectorProduct( row, inVector ) ); outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; Loading src/implementation/matrices/tnlEllpackMatrix_impl.h +25 −9 Original line number Diff line number Diff line Loading @@ -462,10 +462,9 @@ template< typename Real, typename Vector::RealType tnlEllpackMatrix< Real, Device, Index >::rowVectorProduct( const IndexType row, const Vector& vector ) const { typedef tnlEllpackMatrixDeviceDependentCode< DeviceType > DDCType; IndexType i = DDCType::getRowBegin( *this, row ); const IndexType rowEnd = DDCType::getRowEnd( *this, row ); const IndexType step = DDCType::getElementStep( *this ); IndexType i = DeviceDependentCode::getRowBegin( *this, row ); const IndexType rowEnd = DeviceDependentCode::getRowEnd( *this, row ); const IndexType step = DeviceDependentCode::getElementStep( *this ); Real result = 0.0; while( i < rowEnd && this->columnIndexes[ i ] < this->columns ) Loading @@ -484,11 +483,7 @@ template< typename Real, void tnlEllpackMatrix< Real, Device, Index >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -657,6 +652,17 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost > { return 1; } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; template<> Loading Loading @@ -697,6 +703,16 @@ class tnlEllpackMatrixDeviceDependentCode< tnlCuda > { return matrix.alignedRows; } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { tnlMatrixVectorProductCuda( matrix, inVector, outVector ); } }; Loading src/implementation/matrices/tnlMatrixReader_impl.h +15 −0 Original line number Diff line number Diff line Loading @@ -25,6 +25,21 @@ using namespace std; template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( const tnlString& fileName, Matrix& matrix, bool verbose ) { fstream file; file.open( fileName.getString(), ios::in ); if( ! file ) { cerr << "I am not able to open the file " << fileName << "." << endl; return false; } return readMtxFile( file, matrix, verbose ); } template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, Loading src/implementation/matrices/tnlSlicedEllpackMatrix_impl.h +24 −5 Original line number Diff line number Diff line Loading @@ -496,11 +496,7 @@ template< typename Real, void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -706,6 +702,18 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > } matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 ); } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; #ifdef HAVE_CUDA Loading Loading @@ -811,6 +819,17 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > checkCudaDevice; #endif } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { tnlMatrixVectorProductCuda( matrix, inVector, outVector ); } }; Loading Loading
src/implementation/matrices/tnlCSRMatrix_impl.h +2 −2 Original line number Diff line number Diff line Loading @@ -370,8 +370,8 @@ typename Vector::RealType tnlCSRMatrix< Real, Device, Index >::rowVectorProduct( const IndexType rowEnd = this->rowPointers[ row + 1 ]; while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < this->columns ) { const Index column = this->columnIndexes.getElement( elementPtr ); result += this->values.getElement( elementPtr++ ) * vector.getElement( column ); const Index column = this->columnIndexes[ elementPtr ]; result += this->values[ elementPtr++ ] * vector[ column ]; } return result; } Loading
src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +28 −20 Original line number Diff line number Diff line Loading @@ -120,7 +120,7 @@ void tnlChunkedEllpackMatrix< Real, Device, Index >::resolveSliceSizes( const tn sliceSize++; row++; if( allocatedElementsInSlice < desiredElementsInSlice ) if( row < this->rows - 1 && sliceSize < chunksInSlice ) continue; if( row < this->rows && sliceSize < chunksInSlice ) continue; tnlAssert( sliceSize >0, ); this->slices[ numberOfSlices ].size = sliceSize; this->slices[ numberOfSlices ].firstRow = row - sliceSize; Loading Loading @@ -153,20 +153,27 @@ bool tnlChunkedEllpackMatrix< Real, Device, Index >::setSlice( const RowLengthsV IndexType freeChunks = this->chunksInSlice - sliceSize; for( IndexType i = sliceBegin; i < sliceEnd; i++ ) this->rowToChunkMapping.setElement( i, 1 ); while( freeChunks ) { for( IndexType i = sliceBegin; i < sliceEnd && freeChunks > 0; i++ ) int totalAddedChunks( 0 ); int maxRowLength( rowLengths[ sliceBegin ] ); for( IndexType i = sliceBegin; i < sliceEnd; i++ ) { RealType rowRatio( 0.0 ); double rowRatio( 0.0 ); if( allocatedElementsInSlice != 0 ) rowRatio = ( RealType ) rowLengths[ i ] / ( RealType ) allocatedElementsInSlice; const IndexType addedChunks = ceil( freeChunks * rowRatio ); freeChunks -= addedChunks; rowRatio = ( double ) rowLengths[ i ] / ( double ) allocatedElementsInSlice; const IndexType addedChunks = freeChunks * rowRatio; totalAddedChunks += addedChunks; this->rowToChunkMapping[ i ] += addedChunks; tnlAssert( rowToChunkMapping[ i ] > 0, cerr << " rowToChunkMapping[ i ] = " << rowToChunkMapping[ i ] << endl ); if( maxRowLength < rowLengths[ i ] ) maxRowLength = rowLengths[ i ]; } tnlAssert( freeChunks >= 0, ); freeChunks -= totalAddedChunks; while( freeChunks ) for( IndexType i = sliceBegin; i < sliceEnd && freeChunks; i++ ) if( rowLengths[ i ] == maxRowLength ) { this->rowToChunkMapping[ i ]++; freeChunks--; } /**** Loading Loading @@ -283,6 +290,7 @@ template< typename Real, void tnlChunkedEllpackMatrix< Real, Device, Index >::reset() { tnlSparseMatrix< Real, Device, Index >::reset(); this->slices.reset(); this->rowToChunkMapping.reset(); this->rowToSliceMapping.reset(); } Loading Loading @@ -677,7 +685,7 @@ bool tnlChunkedEllpackMatrix< Real, Device, Index >::setRow( const IndexType row cerr << " row = " << row << " this->rows = " << this->rows ); const IndexType& sliceIndex = rowToSliceMapping.getElement( row ); const IndexType sliceIndex = rowToSliceMapping.getElement( row ); tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices.getElement( sliceIndex ).firstRow ) Loading Loading @@ -777,7 +785,7 @@ template< typename Real, Real tnlChunkedEllpackMatrix< Real, Device, Index >::getElementFast( const IndexType row, const IndexType column ) const { const IndexType& sliceIndex = rowToSliceMapping[ row ]; const IndexType sliceIndex = rowToSliceMapping[ row ]; tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices[ sliceIndex ].firstRow ) Loading Loading @@ -998,7 +1006,7 @@ typename Vector::RealType tnlChunkedEllpackMatrix< Real, Device, Index >::rowVec /*tnlAssert( row >=0 && row < this->rows, cerr << " row = " << row << " this->rows = " << this->rows );*/ const IndexType& sliceIndex = rowToSliceMapping[ row ]; const IndexType sliceIndex = rowToSliceMapping[ row ]; //tnlAssert( sliceIndex < this->rows, ); IndexType chunkIndex( 0 ); if( row != slices[ sliceIndex ].firstRow ) Loading Loading @@ -1066,12 +1074,12 @@ __device__ void tnlChunkedEllpackMatrix< Real, Device, Index >::computeSliceVect if( threadIdx.x == 0 ) ( *sliceInfo ) = this->slices[ sliceIdx ]; __syncthreads; __syncthreads(); chunkProducts[ threadIdx.x ] = this->chunkVectorProduct( sliceInfo->pointer, threadIdx.x, sliceInfo->chunkSize, *inVector ); __syncthreads; __syncthreads(); if( threadIdx.x < sliceInfo->size ) { const IndexType row = sliceInfo->firstRow + threadIdx.x; Loading Loading @@ -1293,7 +1301,7 @@ class tnlChunkedEllpackMatrixDeviceDependentCode< tnlHost > Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector.setElement( row, matrix.rowVectorProduct( row, inVector ) ); outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; Loading
src/implementation/matrices/tnlEllpackMatrix_impl.h +25 −9 Original line number Diff line number Diff line Loading @@ -462,10 +462,9 @@ template< typename Real, typename Vector::RealType tnlEllpackMatrix< Real, Device, Index >::rowVectorProduct( const IndexType row, const Vector& vector ) const { typedef tnlEllpackMatrixDeviceDependentCode< DeviceType > DDCType; IndexType i = DDCType::getRowBegin( *this, row ); const IndexType rowEnd = DDCType::getRowEnd( *this, row ); const IndexType step = DDCType::getElementStep( *this ); IndexType i = DeviceDependentCode::getRowBegin( *this, row ); const IndexType rowEnd = DeviceDependentCode::getRowEnd( *this, row ); const IndexType step = DeviceDependentCode::getElementStep( *this ); Real result = 0.0; while( i < rowEnd && this->columnIndexes[ i ] < this->columns ) Loading @@ -484,11 +483,7 @@ template< typename Real, void tnlEllpackMatrix< Real, Device, Index >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -657,6 +652,17 @@ class tnlEllpackMatrixDeviceDependentCode< tnlHost > { return 1; } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; template<> Loading Loading @@ -697,6 +703,16 @@ class tnlEllpackMatrixDeviceDependentCode< tnlCuda > { return matrix.alignedRows; } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { tnlMatrixVectorProductCuda( matrix, inVector, outVector ); } }; Loading
src/implementation/matrices/tnlMatrixReader_impl.h +15 −0 Original line number Diff line number Diff line Loading @@ -25,6 +25,21 @@ using namespace std; template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( const tnlString& fileName, Matrix& matrix, bool verbose ) { fstream file; file.open( fileName.getString(), ios::in ); if( ! file ) { cerr << "I am not able to open the file " << fileName << "." << endl; return false; } return readMtxFile( file, matrix, verbose ); } template< typename Matrix > bool tnlMatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, Loading
src/implementation/matrices/tnlSlicedEllpackMatrix_impl.h +24 −5 Original line number Diff line number Diff line Loading @@ -496,11 +496,7 @@ template< typename Real, void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -706,6 +702,18 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > } matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 ); } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; #ifdef HAVE_CUDA Loading Loading @@ -811,6 +819,17 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > checkCudaDevice; #endif } template< typename Real, typename Index, typename Vector > static void vectorProduct( const tnlSlicedEllpackMatrix< Real, Device, Index >& matrix, const Vector& inVector, Vector& outVector ) { tnlMatrixVectorProductCuda( matrix, inVector, outVector ); } }; Loading