Loading src/implementation/core/cuda/cuda-prefix-sum_impl.h +9 −8 Original line number Diff line number Diff line Loading @@ -224,11 +224,11 @@ bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, input, output, auxArray1 ); cudaError error = cudaGetLastError(); if( error != cudaSuccess ) if( ! checkCudaDevice ) { cerr << "The CUDA kernel 'cudaFirstPhaseBlockPrefixSum' ended with error code " << error << "." << endl; cerr << "The CUDA kernel 'cudaFirstPhaseBlockPrefixSum' ended with error." << endl; cudaFree( auxArray1 ); cudaFree( auxArray2 ); return false; } Loading @@ -251,11 +251,12 @@ bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, cudaSecondPhaseBlockPrefixSum< DataType, Operation, Index > <<< cudaGridSize, cudaBlockSize >>> ( operation, size, elementsInBlock, gridShift, auxArray2, output ); error = cudaGetLastError(); if( error != cudaSuccess ) if( ! checkCudaDevice ) { cerr << "The CUDA kernel 'cudaSecondPhaseBlockPrefixSum' ended with error code " << error << "." << endl; cerr << "The CUDA kernel 'cudaSecondPhaseBlockPrefixSum' ended with error." << endl; cudaFree( auxArray1 ); cudaFree( auxArray2 ); return false; } cudaFree( auxArray1 ); Loading src/implementation/matrices/tnlSlicedEllpackMatrix_impl.h +189 −94 Original line number Diff line number Diff line Loading @@ -76,18 +76,11 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setRowLengths( co ! this->slicePointers.setSize( slices + 1 ) ) return false; /**** * Compute maximal row length in each slice */ DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths ); /**** * Compute the slice pointers using the exclusive prefix sum */ this->slicePointers.setElement( slices, 0 ); this->slicePointers.computeExclusivePrefixSum(); return this->allocateMatrixElements( this->slicePointers[ slices ] ); return this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } template< typename Real, Loading Loading @@ -170,7 +163,7 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setElementFast( c const IndexType column, const Real& value ) { return this->addElement( row, column, value, 0.0 ); return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, Loading Loading @@ -204,12 +197,10 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElementFast( c << " this->rows = " << this->rows << " this->columns = " << this-> columns ); const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd( elementPtr + rowLength ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr++; Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr += step; if( elementPtr == rowEnd ) return false; if( this->columnIndexes[ elementPtr ] == column ) Loading @@ -226,12 +217,12 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElementFast( c } else { IndexType j = rowEnd - 1; IndexType j = rowEnd - step; while( j > elementPtr ) { this->columnIndexes[ j ] = this->columnIndexes[ j - 1 ]; this->values[ j ] = this->values[ j - 1 ]; j--; this->columnIndexes[ j ] = this->columnIndexes[ j - step ]; this->values[ j ] = this->values[ j - step ]; j -= step; } this->columnIndexes[ elementPtr ] = column; this->values[ elementPtr ] = value; Loading @@ -249,7 +240,45 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElement( const const RealType& value, const RealType& thisElementMultiplicator ) { return this->addElementFast( row, column, value, thisElementMultiplicator ); tnlAssert( row >= 0 && row < this->rows && column >= 0 && column <= this->rows, cerr << " row = " << row << " column = " << column << " this->rows = " << this->rows << " this->columns = " << this-> columns ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) < column ) elementPtr += step; if( elementPtr == rowEnd ) return false; if( this->columnIndexes.getElement( elementPtr ) == column ) { this->values.setElement( elementPtr, thisElementMultiplicator * this->values.getElement( elementPtr ) + value ); return true; } else if( this->columnIndexes.getElement( elementPtr ) == this->columns ) { this->columnIndexes.setElement( elementPtr, column ); this->values.setElement( elementPtr, value ); return true; } else { IndexType j = rowEnd - step; while( j > elementPtr ) { this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) ); this->values.setElement( j, this->values.getElement( j - step ) ); j -= step; } this->columnIndexes.setElement( elementPtr, column ); this->values.setElement( elementPtr, value ); return true; } return false; } template< typename Real, Loading @@ -269,16 +298,20 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRowFast( con if( elements > rowLength ) return false; IndexType elementPointer = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); Index elementPointer, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step ); for( IndexType i = 0; i < elements; i++ ) { this->columnIndexes[ elementPointer ] = columnIndexes[ i ]; this->values[ elementPointer ] = values[ i ]; elementPointer++; elementPointer += step; } for( IndexType i = elements; i < rowLength; i++ ) this->columnIndexes[ elementPointer++ ] = this->getColumns(); { this->columnIndexes[ elementPointer ] = this->getColumns(); elementPointer += step; } return true; } Loading @@ -291,7 +324,26 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRow( const I const RealType* values, const IndexType elements ) { return this->setRowFast( row, columnIndexes, values, elements ); const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths.getElement( sliceIdx ); if( elements > rowLength ) return false; Index elementPointer, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step ); for( IndexType i = 0; i < elements; i++ ) { this->columnIndexes.setElement( elementPointer, this->columnIndexes.getElement( i ) ); this->values.setElement( elementPointer, this->values.getElement( i ) ); elementPointer += step; } for( IndexType i = elements; i < rowLength; i++ ) { this->columnIndexes.setElement( elementPointer, this->getColumns() ); elementPointer += step; } return true; } template< typename Real, Loading Loading @@ -335,13 +387,11 @@ template< typename Real, Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElementFast( const IndexType row, const IndexType column ) const { const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd = elementPtr + rowLength; Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr++; elementPtr += step; if( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] == column ) return this->values[ elementPtr ]; return 0.0; Loading @@ -354,7 +404,15 @@ template< typename Real, Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElement( const IndexType row, const IndexType column ) const { return this->getElementFast( row, column ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) < column ) elementPtr += step; if( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) == column ) return this->values.getElement( elementPtr ); return 0.0; } template< typename Real, Loading @@ -368,15 +426,15 @@ void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRowFast( const IndexType* columns, RealType* values ) const { const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); for( IndexType i = 0; i < rowLength; i++ ) Index elementPtr, rowEnd, step, i( 0 ); DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd ) { columns[ i ] = this->columnIndexes[ elementPtr ]; values[ i ] = this->values[ elementPtr ]; elementPtr++; elementPtr += step; i++; } } Loading @@ -388,33 +446,55 @@ void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRow( const Ind IndexType* columns, RealType* values ) const { return this->getRowFast( row, columns, values ); } Index elementPtr, rowEnd, step, i( 0 ); DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd ) { columns[ i ] = this->columnIndexes.getElement( elementPtr ); values[ i ] = this->values.getElement( elementPtr ); elementPtr += step; i++; } } template< typename Real, typename Device, typename Index, int SliceSize > template< typename Vector > void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { for( Index row = 0; row < this->getRows(); row ++ ) #ifdef HAVE_CUDA __device__ __host__ #endif typename Vector::RealType tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::rowVectorProduct( const IndexType row, const Vector& vector ) const { Real result = 0.0; const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd( elementPtr + rowLength ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < this->columns ) { const Index column = this->columnIndexes.getElement( elementPtr ); result += this->values.getElement( elementPtr++ ) * inVector.getElement( column ); const Index column = this->columnIndexes[ elementPtr ]; result += this->values[ elementPtr ] * vector[ column ]; elementPtr += step; } outVector.setElement( row, result ); return result; } template< typename Real, typename Device, typename Index, int SliceSize > template< typename Vector > void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -562,29 +642,40 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > template< typename Real, typename Index, int SliceSize > static Index getRowBegin( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return row * matrix.rowLengths; } const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers.getElement( sliceIdx ); const Index rowLength = matrix.sliceRowLengths.getElement( sliceIdx ); template< typename Real, typename Index, int SliceSize > static Index getRowEnd( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) { return ( row + 1 ) * matrix.rowLengths; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, typename Index, int SliceSize > static Index getElementStep( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix ) static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return 1; const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers[ sliceIdx ]; const Index rowLength = matrix.sliceRowLengths[ sliceIdx ]; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, typename Index, int SliceSize > Loading @@ -607,6 +698,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > matrix.sliceRowLengths.setElement( slice, sliceRowLength ); matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize ); } matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 ); } }; Loading @@ -622,16 +714,18 @@ __global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKerne Index rowIdx = sliceIdx * SliceSize; Index rowInSliceIdx( 0 ); Index maxRowLength( 0 ); if( rowIdx >= matrix->getRows() ) return; while( rowInSliceIdx < SliceSize && rowIdx < matrix->getRows() ) { printf( "sliceIdx = %d rowInSliceIdx = %d SliceSize = %d rowIdx = %d matrix->getRows() = %d \n", sliceIdx, rowInSliceIdx, SliceSize, rowIdx, matrix->getRows() ); maxRowLength = Max( maxRowLength, rowLengths[ rowIdx ] ); printf( "threadIdx.x = %d, maxRowLength = %d \n", threadIdx.x, maxRowLength ); maxRowLength = Max( maxRowLength, ( *rowLengths )[ rowIdx ] ); rowIdx++; rowInSliceIdx++; } //matrix->sliceRowLengths[ sliceIdx ] = maxRowLength; //matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize; matrix->sliceRowLengths[ sliceIdx ] = maxRowLength; matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize; if( threadIdx.x == 0 ) matrix->slicePointers[ matrix->slicePointers.getSize() - 1 ] = 0; } #endif Loading @@ -645,25 +739,19 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > template< typename Real, typename Index, int SliceSize > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getRowBegin( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return row * matrix.rowLengths; } const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers.getElement( sliceIdx ); const Index rowLength = matrix.sliceRowLengths.getElement( sliceIdx ); template< typename Real, typename Index, int SliceSize > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getRowEnd( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) { return ( row + 1 ) * matrix.rowLengths; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, Loading @@ -672,9 +760,19 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getElementStep( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix ) static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return 1; const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers[ sliceIdx ]; const Index rowLength = matrix.sliceRowLengths[ sliceIdx ]; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, Loading @@ -692,7 +790,6 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); const Index cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() ); cout << rowLengths << endl; for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) Loading @@ -705,8 +802,6 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > tnlCuda::freeFromDevice( kernel_matrix ); tnlCuda::freeFromDevice( kernel_rowLengths ); checkCudaDevice; cout << rowLengths << endl; cout << matrix.slicePointers << endl << matrix.sliceRowLengths << endl; #endif } }; Loading tests/unit-tests/matrices/tnlSparseMatrixTester.h +44 −14 Original line number Diff line number Diff line Loading @@ -51,8 +51,6 @@ __global__ void tnlSparseMatrixTester__setElementFast_LowerTriangularMatrixTestC template< typename MatrixType > __global__ void tnlSparseMatrixTester__setElementFast_LowerTriangularMatrixTestCudaKernel2( MatrixType* matrix, bool* testResult ); #endif Loading Loading @@ -92,6 +90,7 @@ class tnlSparseMatrixTester : public CppUnit :: TestCase suiteOfTests->addTest( new TestCallerType( "setElementFast_DenseMatrixTest", &TesterType::setElementFast_DenseMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setElement_LowerTriangularMatrixTest", &TesterType::setElement_LowerTriangularMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setElementFast_LowerTriangularMatrixTest", &TesterType::setElementFast_LowerTriangularMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setRow_DiagonalMatrixTest", &TesterType::setRow_DiagonalMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "addElementTest", &TesterType::addElementTest ) ); suiteOfTests->addTest( new TestCallerType( "vectorProductTest", &TesterType::vectorProductTest ) ); /*suiteOfTests -> addTest( new TestCallerType( "matrixTranspositionTest", &TesterType::matrixTranspositionTest ) ); Loading Loading @@ -497,6 +496,37 @@ class tnlSparseMatrixTester : public CppUnit :: TestCase CPPUNIT_ASSERT( m.getElement( i, j ) == 0 ); } void setRow_DiagonalMatrixTest() { MatrixType m; m.setDimensions( 10, 10 ); IndexVector rowLengths; rowLengths.setSize( m.getRows() ); rowLengths.setValue( 7 ); m.setRowLengths( rowLengths ); RealType values[ 1 ]; IndexType columnIndexes[ 1 ]; for( int i = 0; i < 10; i++ ) { values[ 0 ] = i; columnIndexes[ 0 ] = i; m.setRow( i, columnIndexes, values, 1 ); } for( int i = 0; i < 10; i++ ) { for( int j = 0; j < 10; j++ ) { if( i == j ) CPPUNIT_ASSERT( m.getElement( i, j ) == i ); else CPPUNIT_ASSERT( m.getElement( i, j ) == 0 ); } } } void vectorProductTest() { const int size = 10; Loading Loading
src/implementation/core/cuda/cuda-prefix-sum_impl.h +9 −8 Original line number Diff line number Diff line Loading @@ -224,11 +224,11 @@ bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, input, output, auxArray1 ); cudaError error = cudaGetLastError(); if( error != cudaSuccess ) if( ! checkCudaDevice ) { cerr << "The CUDA kernel 'cudaFirstPhaseBlockPrefixSum' ended with error code " << error << "." << endl; cerr << "The CUDA kernel 'cudaFirstPhaseBlockPrefixSum' ended with error." << endl; cudaFree( auxArray1 ); cudaFree( auxArray2 ); return false; } Loading @@ -251,11 +251,12 @@ bool cudaRecursivePrefixSum( const enumPrefixSumType prefixSumType, cudaSecondPhaseBlockPrefixSum< DataType, Operation, Index > <<< cudaGridSize, cudaBlockSize >>> ( operation, size, elementsInBlock, gridShift, auxArray2, output ); error = cudaGetLastError(); if( error != cudaSuccess ) if( ! checkCudaDevice ) { cerr << "The CUDA kernel 'cudaSecondPhaseBlockPrefixSum' ended with error code " << error << "." << endl; cerr << "The CUDA kernel 'cudaSecondPhaseBlockPrefixSum' ended with error." << endl; cudaFree( auxArray1 ); cudaFree( auxArray2 ); return false; } cudaFree( auxArray1 ); Loading
src/implementation/matrices/tnlSlicedEllpackMatrix_impl.h +189 −94 Original line number Diff line number Diff line Loading @@ -76,18 +76,11 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setRowLengths( co ! this->slicePointers.setSize( slices + 1 ) ) return false; /**** * Compute maximal row length in each slice */ DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths ); /**** * Compute the slice pointers using the exclusive prefix sum */ this->slicePointers.setElement( slices, 0 ); this->slicePointers.computeExclusivePrefixSum(); return this->allocateMatrixElements( this->slicePointers[ slices ] ); return this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } template< typename Real, Loading Loading @@ -170,7 +163,7 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::setElementFast( c const IndexType column, const Real& value ) { return this->addElement( row, column, value, 0.0 ); return this->addElementFast( row, column, value, 0.0 ); } template< typename Real, Loading Loading @@ -204,12 +197,10 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElementFast( c << " this->rows = " << this->rows << " this->columns = " << this-> columns ); const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd( elementPtr + rowLength ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr++; Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr += step; if( elementPtr == rowEnd ) return false; if( this->columnIndexes[ elementPtr ] == column ) Loading @@ -226,12 +217,12 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElementFast( c } else { IndexType j = rowEnd - 1; IndexType j = rowEnd - step; while( j > elementPtr ) { this->columnIndexes[ j ] = this->columnIndexes[ j - 1 ]; this->values[ j ] = this->values[ j - 1 ]; j--; this->columnIndexes[ j ] = this->columnIndexes[ j - step ]; this->values[ j ] = this->values[ j - step ]; j -= step; } this->columnIndexes[ elementPtr ] = column; this->values[ elementPtr ] = value; Loading @@ -249,7 +240,45 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::addElement( const const RealType& value, const RealType& thisElementMultiplicator ) { return this->addElementFast( row, column, value, thisElementMultiplicator ); tnlAssert( row >= 0 && row < this->rows && column >= 0 && column <= this->rows, cerr << " row = " << row << " column = " << column << " this->rows = " << this->rows << " this->columns = " << this-> columns ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) < column ) elementPtr += step; if( elementPtr == rowEnd ) return false; if( this->columnIndexes.getElement( elementPtr ) == column ) { this->values.setElement( elementPtr, thisElementMultiplicator * this->values.getElement( elementPtr ) + value ); return true; } else if( this->columnIndexes.getElement( elementPtr ) == this->columns ) { this->columnIndexes.setElement( elementPtr, column ); this->values.setElement( elementPtr, value ); return true; } else { IndexType j = rowEnd - step; while( j > elementPtr ) { this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) ); this->values.setElement( j, this->values.getElement( j - step ) ); j -= step; } this->columnIndexes.setElement( elementPtr, column ); this->values.setElement( elementPtr, value ); return true; } return false; } template< typename Real, Loading @@ -269,16 +298,20 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRowFast( con if( elements > rowLength ) return false; IndexType elementPointer = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); Index elementPointer, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step ); for( IndexType i = 0; i < elements; i++ ) { this->columnIndexes[ elementPointer ] = columnIndexes[ i ]; this->values[ elementPointer ] = values[ i ]; elementPointer++; elementPointer += step; } for( IndexType i = elements; i < rowLength; i++ ) this->columnIndexes[ elementPointer++ ] = this->getColumns(); { this->columnIndexes[ elementPointer ] = this->getColumns(); elementPointer += step; } return true; } Loading @@ -291,7 +324,26 @@ bool tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize > :: setRow( const I const RealType* values, const IndexType elements ) { return this->setRowFast( row, columnIndexes, values, elements ); const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths.getElement( sliceIdx ); if( elements > rowLength ) return false; Index elementPointer, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPointer, rowEnd, step ); for( IndexType i = 0; i < elements; i++ ) { this->columnIndexes.setElement( elementPointer, this->columnIndexes.getElement( i ) ); this->values.setElement( elementPointer, this->values.getElement( i ) ); elementPointer += step; } for( IndexType i = elements; i < rowLength; i++ ) { this->columnIndexes.setElement( elementPointer, this->getColumns() ); elementPointer += step; } return true; } template< typename Real, Loading Loading @@ -335,13 +387,11 @@ template< typename Real, Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElementFast( const IndexType row, const IndexType column ) const { const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd = elementPtr + rowLength; Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < column ) elementPtr++; elementPtr += step; if( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] == column ) return this->values[ elementPtr ]; return 0.0; Loading @@ -354,7 +404,15 @@ template< typename Real, Real tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getElement( const IndexType row, const IndexType column ) const { return this->getElementFast( row, column ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) < column ) elementPtr += step; if( elementPtr < rowEnd && this->columnIndexes.getElement( elementPtr ) == column ) return this->values.getElement( elementPtr ); return 0.0; } template< typename Real, Loading @@ -368,15 +426,15 @@ void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRowFast( const IndexType* columns, RealType* values ) const { const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); for( IndexType i = 0; i < rowLength; i++ ) Index elementPtr, rowEnd, step, i( 0 ); DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd ) { columns[ i ] = this->columnIndexes[ elementPtr ]; values[ i ] = this->values[ elementPtr ]; elementPtr++; elementPtr += step; i++; } } Loading @@ -388,33 +446,55 @@ void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::getRow( const Ind IndexType* columns, RealType* values ) const { return this->getRowFast( row, columns, values ); } Index elementPtr, rowEnd, step, i( 0 ); DeviceDependentCode::initRowTraverse( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd ) { columns[ i ] = this->columnIndexes.getElement( elementPtr ); values[ i ] = this->values.getElement( elementPtr ); elementPtr += step; i++; } } template< typename Real, typename Device, typename Index, int SliceSize > template< typename Vector > void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { for( Index row = 0; row < this->getRows(); row ++ ) #ifdef HAVE_CUDA __device__ __host__ #endif typename Vector::RealType tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::rowVectorProduct( const IndexType row, const Vector& vector ) const { Real result = 0.0; const IndexType sliceIdx = row / SliceSize; const IndexType rowLength = this->sliceRowLengths[ sliceIdx ]; IndexType elementPtr = this->slicePointers[ sliceIdx ] + rowLength * ( row - sliceIdx * SliceSize ); const IndexType rowEnd( elementPtr + rowLength ); Index elementPtr, rowEnd, step; DeviceDependentCode::initRowTraverseFast( *this, row, elementPtr, rowEnd, step ); while( elementPtr < rowEnd && this->columnIndexes[ elementPtr ] < this->columns ) { const Index column = this->columnIndexes.getElement( elementPtr ); result += this->values.getElement( elementPtr++ ) * inVector.getElement( column ); const Index column = this->columnIndexes[ elementPtr ]; result += this->values[ elementPtr ] * vector[ column ]; elementPtr += step; } outVector.setElement( row, result ); return result; } template< typename Real, typename Device, typename Index, int SliceSize > template< typename Vector > void tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >::vectorProduct( const Vector& inVector, Vector& outVector ) const { if( DeviceType::getDevice() == tnlHostDevice ) for( Index row = 0; row < this->getRows(); row ++ ) outVector[ row ] = this->rowVectorProduct( row, inVector); if( DeviceType::getDevice() == tnlCudaDevice ) tnlMatrixVectorProductCuda( *this, inVector, outVector ); } template< typename Real, Loading Loading @@ -562,29 +642,40 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > template< typename Real, typename Index, int SliceSize > static Index getRowBegin( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return row * matrix.rowLengths; } const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers.getElement( sliceIdx ); const Index rowLength = matrix.sliceRowLengths.getElement( sliceIdx ); template< typename Real, typename Index, int SliceSize > static Index getRowEnd( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) { return ( row + 1 ) * matrix.rowLengths; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, typename Index, int SliceSize > static Index getElementStep( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix ) static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return 1; const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers[ sliceIdx ]; const Index rowLength = matrix.sliceRowLengths[ sliceIdx ]; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, typename Index, int SliceSize > Loading @@ -607,6 +698,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlHost > matrix.sliceRowLengths.setElement( slice, sliceRowLength ); matrix.slicePointers.setElement( slice++, sliceRowLength * SliceSize ); } matrix.slicePointers.setElement( matrix.slicePointers.getSize() - 1, 0 ); } }; Loading @@ -622,16 +714,18 @@ __global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKerne Index rowIdx = sliceIdx * SliceSize; Index rowInSliceIdx( 0 ); Index maxRowLength( 0 ); if( rowIdx >= matrix->getRows() ) return; while( rowInSliceIdx < SliceSize && rowIdx < matrix->getRows() ) { printf( "sliceIdx = %d rowInSliceIdx = %d SliceSize = %d rowIdx = %d matrix->getRows() = %d \n", sliceIdx, rowInSliceIdx, SliceSize, rowIdx, matrix->getRows() ); maxRowLength = Max( maxRowLength, rowLengths[ rowIdx ] ); printf( "threadIdx.x = %d, maxRowLength = %d \n", threadIdx.x, maxRowLength ); maxRowLength = Max( maxRowLength, ( *rowLengths )[ rowIdx ] ); rowIdx++; rowInSliceIdx++; } //matrix->sliceRowLengths[ sliceIdx ] = maxRowLength; //matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize; matrix->sliceRowLengths[ sliceIdx ] = maxRowLength; matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize; if( threadIdx.x == 0 ) matrix->slicePointers[ matrix->slicePointers.getSize() - 1 ] = 0; } #endif Loading @@ -645,25 +739,19 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > template< typename Real, typename Index, int SliceSize > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getRowBegin( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) static void initRowTraverse( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return row * matrix.rowLengths; } const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers.getElement( sliceIdx ); const Index rowLength = matrix.sliceRowLengths.getElement( sliceIdx ); template< typename Real, typename Index, int SliceSize > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getRowEnd( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row ) { return ( row + 1 ) * matrix.rowLengths; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, Loading @@ -672,9 +760,19 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > #ifdef HAVE_CUDA __device__ __host__ #endif static Index getElementStep( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix ) static void initRowTraverseFast( const tnlSlicedEllpackMatrix< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, Index& rowEnd, Index& step ) { return 1; const Index sliceIdx = row / SliceSize; const Index slicePointer = matrix.slicePointers[ sliceIdx ]; const Index rowLength = matrix.sliceRowLengths[ sliceIdx ]; rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); rowEnd = rowBegin + rowLength; step = 1; } template< typename Real, Loading @@ -692,7 +790,6 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); const Index cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() ); cout << rowLengths << endl; for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) { if( gridIdx == cudaGrids - 1 ) Loading @@ -705,8 +802,6 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda > tnlCuda::freeFromDevice( kernel_matrix ); tnlCuda::freeFromDevice( kernel_rowLengths ); checkCudaDevice; cout << rowLengths << endl; cout << matrix.slicePointers << endl << matrix.sliceRowLengths << endl; #endif } }; Loading
tests/unit-tests/matrices/tnlSparseMatrixTester.h +44 −14 Original line number Diff line number Diff line Loading @@ -51,8 +51,6 @@ __global__ void tnlSparseMatrixTester__setElementFast_LowerTriangularMatrixTestC template< typename MatrixType > __global__ void tnlSparseMatrixTester__setElementFast_LowerTriangularMatrixTestCudaKernel2( MatrixType* matrix, bool* testResult ); #endif Loading Loading @@ -92,6 +90,7 @@ class tnlSparseMatrixTester : public CppUnit :: TestCase suiteOfTests->addTest( new TestCallerType( "setElementFast_DenseMatrixTest", &TesterType::setElementFast_DenseMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setElement_LowerTriangularMatrixTest", &TesterType::setElement_LowerTriangularMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setElementFast_LowerTriangularMatrixTest", &TesterType::setElementFast_LowerTriangularMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "setRow_DiagonalMatrixTest", &TesterType::setRow_DiagonalMatrixTest ) ); suiteOfTests->addTest( new TestCallerType( "addElementTest", &TesterType::addElementTest ) ); suiteOfTests->addTest( new TestCallerType( "vectorProductTest", &TesterType::vectorProductTest ) ); /*suiteOfTests -> addTest( new TestCallerType( "matrixTranspositionTest", &TesterType::matrixTranspositionTest ) ); Loading Loading @@ -497,6 +496,37 @@ class tnlSparseMatrixTester : public CppUnit :: TestCase CPPUNIT_ASSERT( m.getElement( i, j ) == 0 ); } void setRow_DiagonalMatrixTest() { MatrixType m; m.setDimensions( 10, 10 ); IndexVector rowLengths; rowLengths.setSize( m.getRows() ); rowLengths.setValue( 7 ); m.setRowLengths( rowLengths ); RealType values[ 1 ]; IndexType columnIndexes[ 1 ]; for( int i = 0; i < 10; i++ ) { values[ 0 ] = i; columnIndexes[ 0 ] = i; m.setRow( i, columnIndexes, values, 1 ); } for( int i = 0; i < 10; i++ ) { for( int j = 0; j < 10; j++ ) { if( i == j ) CPPUNIT_ASSERT( m.getElement( i, j ) == i ); else CPPUNIT_ASSERT( m.getElement( i, j ) == 0 ); } } } void vectorProductTest() { const int size = 10; Loading