Loading src/implementation/matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ SET( headers tnlMatrix_impl.h tnlMatrixReader_impl.h tnlMatrixWriter_impl.h tnlMatrixSetter_impl.h tnlBiEllpackMatrix_impl.h ) tnlBiEllpackMatrix_impl.h tnlEllpackGraphMatrix_impl.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/matrices ) set( common_SOURCES Loading src/implementation/matrices/tnlEllpackGraphMatrix_impl.h 0 → 100644 +915 −0 File added.Preview size limit exceeded, changes collapsed. Show changes src/implementation/matrices/tnlEllpackSymMatrix_impl.h +11 −11 Original line number Diff line number Diff line Loading @@ -734,14 +734,14 @@ void tnlEllpackSymMatrix< Real, Device, Index >::spmvCuda( const InVector& inVec { IndexType i = DeviceDependentCode::getRowBegin( *this, rowId ); const IndexType rowEnd = DeviceDependentCode::getRowEnd( *this, rowId ); const IndexTpe step = DeviceDependentCode::getElementStep( *this ); const IndexType step = DeviceDependentCode::getElementStep( *this ); while( i < rowEnd && this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) while( i < rowEnd && this->columnIndexes[ i ] != this->getPaddingIndex() ) { const IndexType column = this->columnIndexes.getElemnt( i ); outVector[ rowId ] += this->values.getElement( i ) * inVector[ column ]; const IndexType column = this->columnIndexes[ i ]; outVector[ rowId ] += this->values[ i ] * inVector[ column ]; if( rowId != column ) outVector[ column ].add( this->values.getElement( i ) * inVector[ row ] ); outVector[ column ].add( this->values[ i ] * inVector[ rowId ] ); i += step; } }; Loading @@ -753,15 +753,15 @@ template< typename Real, typename InVector, typename OutVector > __global__ tnlEllpackSymMatrixVectorProductCuda< Real, tnlCuda, Index >( const tnlEllpackSymMatrix& matrix, const InVector& inVector, OutVector& outVector, void tnlEllpackSymMatrixVectorProductCuda( const tnlEllpackSymMatrix< Real, tnlCuda, Index >* matrix, const InVector* inVector, OutVector* outVector, const int gridIdx ) { int globalIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( globalIdx >= matrix->getRows() ) return; matrix->spmvCuda( inVector, outVector, globalIdx ); matrix->spmvCuda( *inVector, *outVector, globalIdx ); }; #endif Loading Loading @@ -827,7 +827,7 @@ class tnlEllpackSymMatrixDeviceDependentCode< tnlCuda > if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); tnlEllpackSymMatrixVectorProductCuda< Real, Index, StripSize, InVector, OutVector > tnlEllpackSymMatrixVectorProductCuda< Real, Index, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( kernel_this, kernel_inVector, Loading src/implementation/matrices/tnlMatrix_impl.h +8 −0 Original line number Diff line number Diff line Loading @@ -223,6 +223,14 @@ void tnlMatrix< Real, Device, Index >::print( ostream& str ) const { } template< typename Real, typename Device, typename Index > bool tnlMatrix< Real, Device, Index >::help() { return true; } #ifdef HAVE_CUDA template< typename Matrix, typename InVector, Loading src/matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ SET( headers tnlMatrix.h tnlMatrixReader.h tnlMatrixWriter.h tnlMatrixSetter.h tnlBiEllpackMatrix.h ) tnlBiEllpackMatrix.h tnlEllpackGraphMatrix.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/matrices ) set( common_SOURCES ) Loading Loading
src/implementation/matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ SET( headers tnlMatrix_impl.h tnlMatrixReader_impl.h tnlMatrixWriter_impl.h tnlMatrixSetter_impl.h tnlBiEllpackMatrix_impl.h ) tnlBiEllpackMatrix_impl.h tnlEllpackGraphMatrix_impl.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/implementation/matrices ) set( common_SOURCES Loading
src/implementation/matrices/tnlEllpackGraphMatrix_impl.h 0 → 100644 +915 −0 File added.Preview size limit exceeded, changes collapsed. Show changes
src/implementation/matrices/tnlEllpackSymMatrix_impl.h +11 −11 Original line number Diff line number Diff line Loading @@ -734,14 +734,14 @@ void tnlEllpackSymMatrix< Real, Device, Index >::spmvCuda( const InVector& inVec { IndexType i = DeviceDependentCode::getRowBegin( *this, rowId ); const IndexType rowEnd = DeviceDependentCode::getRowEnd( *this, rowId ); const IndexTpe step = DeviceDependentCode::getElementStep( *this ); const IndexType step = DeviceDependentCode::getElementStep( *this ); while( i < rowEnd && this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) while( i < rowEnd && this->columnIndexes[ i ] != this->getPaddingIndex() ) { const IndexType column = this->columnIndexes.getElemnt( i ); outVector[ rowId ] += this->values.getElement( i ) * inVector[ column ]; const IndexType column = this->columnIndexes[ i ]; outVector[ rowId ] += this->values[ i ] * inVector[ column ]; if( rowId != column ) outVector[ column ].add( this->values.getElement( i ) * inVector[ row ] ); outVector[ column ].add( this->values[ i ] * inVector[ rowId ] ); i += step; } }; Loading @@ -753,15 +753,15 @@ template< typename Real, typename InVector, typename OutVector > __global__ tnlEllpackSymMatrixVectorProductCuda< Real, tnlCuda, Index >( const tnlEllpackSymMatrix& matrix, const InVector& inVector, OutVector& outVector, void tnlEllpackSymMatrixVectorProductCuda( const tnlEllpackSymMatrix< Real, tnlCuda, Index >* matrix, const InVector* inVector, OutVector* outVector, const int gridIdx ) { int globalIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; if( globalIdx >= matrix->getRows() ) return; matrix->spmvCuda( inVector, outVector, globalIdx ); matrix->spmvCuda( *inVector, *outVector, globalIdx ); }; #endif Loading Loading @@ -827,7 +827,7 @@ class tnlEllpackSymMatrixDeviceDependentCode< tnlCuda > if( gridIdx == cudaGrids - 1 ) cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize(); const int sharedMemory = cudaBlockSize.x * sizeof( Real ); tnlEllpackSymMatrixVectorProductCuda< Real, Index, StripSize, InVector, OutVector > tnlEllpackSymMatrixVectorProductCuda< Real, Index, InVector, OutVector > <<< cudaGridSize, cudaBlockSize, sharedMemory >>> ( kernel_this, kernel_inVector, Loading
src/implementation/matrices/tnlMatrix_impl.h +8 −0 Original line number Diff line number Diff line Loading @@ -223,6 +223,14 @@ void tnlMatrix< Real, Device, Index >::print( ostream& str ) const { } template< typename Real, typename Device, typename Index > bool tnlMatrix< Real, Device, Index >::help() { return true; } #ifdef HAVE_CUDA template< typename Matrix, typename InVector, Loading
src/matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ SET( headers tnlMatrix.h tnlMatrixReader.h tnlMatrixWriter.h tnlMatrixSetter.h tnlBiEllpackMatrix.h ) tnlBiEllpackMatrix.h tnlEllpackGraphMatrix.h ) SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/matrices ) set( common_SOURCES ) Loading