From 190cc1a7c3579240746c5f95cf398669852132ae Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Wed, 16 Apr 2014 15:21:13 +0200 Subject: [PATCH] Implementing tridiagonal matrix in CUDA. --- .gitignore | 2 + CMakeLists.txt | 1 + install | 2 +- .../matrices/tnlTridiagonalMatrix_impl.h | 282 ++++++++++++++---- src/matrices/tnlTridiagonalMatrix.h | 35 ++- tests/unit-tests/CMakeLists.txt | 6 +- tests/unit-tests/matrices/CMakeLists.txt | 20 +- .../matrices/tnlTridiagonalMatrixTest.cu | 8 +- .../matrices/tnlTridiagonalMatrixTester.h | 20 +- 9 files changed, 276 insertions(+), 100 deletions(-) diff --git a/.gitignore b/.gitignore index de822c5284..5dae13a3d4 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ /Debug /Release +.settings + # /src/ /src/Makefile.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b7515f5f9..f0ac89d075 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ endif() if( WITH_CUDA STREQUAL "yes" ) AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" ) + set( CUDA_LINKER_OPTIONS "-arch sm_20 -shared" ) # TODO: fix this in the rest of cmake files else() AddCompilerFlag( "-std=gnu++0x" ) endif() diff --git a/install b/install index 72f4cc8fd6..62a2a978e2 100755 --- a/install +++ b/install @@ -2,7 +2,7 @@ TARGET=TNL INSTALL_PREFIX=${HOME}/local -WITH_CUDA=no +WITH_CUDA=yes WITH_CUSPARSE=no CUDA_ARCHITECTURE=2.0 TEMPLATE_EXPLICIT_INSTANTIATION=yes diff --git a/src/implementation/matrices/tnlTridiagonalMatrix_impl.h b/src/implementation/matrices/tnlTridiagonalMatrix_impl.h index 88b893db60..84e2b57603 100644 --- a/src/implementation/matrices/tnlTridiagonalMatrix_impl.h +++ b/src/implementation/matrices/tnlTridiagonalMatrix_impl.h @@ -21,6 +21,85 @@ #include <core/tnlAssert.h> #include <matrices/tnlTridiagonalMatrix.h> +template< typename Device > +class tnlTridiagonalMatrixDeviceDependentCode; + +template<> +class tnlTridiagonalMatrixDeviceDependentCode< tnlHost > +{ + public: + + template< typename Index > + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return 3*row + column - row; + } + + template< typename Vector, + typename Index, + typename ValuesType > + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ 1 ]; + Index i = 3 * row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i++ ] + + vector[ row ] * values[ i ]; + return vector[ row - 1 ] * values[ i++ ] + + vector[ row ] * values[ i++ ] + + vector[ row + 1 ] * values[ i ]; + } +}; + +template<> +class tnlTridiagonalMatrixDeviceDependentCode< tnlCuda > +{ + public: + + template< typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return ( column - row + 1 )*rows + row - 1; + } + + template< typename Vector, + typename Index, + typename ValuesType > +#ifdef HAVE_CUDA + __device__ +#endif + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ rows - 1 ]; + Index i = row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ]; + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ] + + vector[ row + 1 ] * values[ i + 2*rows ]; + } +}; + + + template< typename Real, typename Device, typename Index > @@ -58,6 +137,7 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::setDimensions( const IndexType if( ! values.setSize( 3*Min( rows, columns ) ) ) return false; this->values.setValue( 0.0 ); + return true; } template< typename Real, @@ -174,7 +254,7 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::setElementFast( const IndexTyp const IndexType column, const RealType& value ) { - this->values.setElement( this->getElementIndex( row, column ), value ); + this->values[ this->getElementIndex( row, column ) ] = value; return true; } @@ -189,7 +269,6 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::setElement( const IndexType ro return true; } - template< typename Real, typename Device, typename Index > @@ -201,7 +280,8 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::addElementFast( const IndexTyp const RealType& value, const RealType& thisElementMultiplicator ) { - this->values.addElement( this->getElementIndex( row, column ), value, thisElementMultiplicator ); + const Index i = this->getElementIndex( row, column ); + this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; } template< typename Real, @@ -212,10 +292,10 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::addElement( const IndexType ro const RealType& value, const RealType& thisElementMultiplicator ) { - //this->values.addElement( this->getElementIndex( row, column ), value, thisElementMultiplicator ); + const Index i = this->getElementIndex( row, column ); + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); } - template< typename Real, typename Device, typename Index > @@ -231,7 +311,7 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::setRowFast( const IndexType ro cerr << " elements = " << elements << " this->columns = " << this->columns << " this->getName() = " << this->getName() ); - return this->addRow( row, columns, values, elements, 0.0 ); + return this->addRowFast( row, columns, values, elements, 0.0 ); } template< typename Real, @@ -246,10 +326,9 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::setRow( const IndexType row, cerr << " elements = " << elements << " this->columns = " << this->columns << " this->getName() = " << this->getName() ); - //return this->addRow( row, columns, values, elements, 0.0 ); + return this->addRow( row, columns, values, elements, 0.0 ); } - template< typename Real, typename Device, typename Index > @@ -273,7 +352,7 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::addRowFast( const IndexType ro const IndexType& column = columns[ i ]; if( column < row - 1 || column > row + 1 ) return false; - addElement( row, column, values[ i ], thisRowMultiplicator ); + addElementFast( row, column, values[ i ], thisRowMultiplicator ); } return true; } @@ -291,16 +370,16 @@ bool tnlTridiagonalMatrix< Real, Device, Index >::addRow( const IndexType row, cerr << " elements = " << elements << " this->columns = " << this->columns << " this->getName() = " << this->getName() ); - /*if( elements > 3 ) + if( elements > 3 ) return false; for( IndexType i = 0; i < elements; i++ ) { - const IndexType& column = columns[ i ]; + const IndexType column = columns[ i ]; if( column < row - 1 || column > row + 1 ) return false; addElement( row, column, values[ i ], thisRowMultiplicator ); } - return true;*/ + return true; } template< typename Real, @@ -314,7 +393,7 @@ Real tnlTridiagonalMatrix< Real, Device, Index >::getElementFast( const IndexTyp { if( abs( column - row ) > 1 ) return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); + return this->values[ this->getElementIndex( row, column ) ]; } template< typename Real, @@ -323,10 +402,11 @@ template< typename Real, Real tnlTridiagonalMatrix< Real, Device, Index >::getElement( const IndexType row, const IndexType column ) const { - return this->getElementFast( row, column ); + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); } - template< typename Real, typename Device, typename Index > @@ -357,56 +437,50 @@ void tnlTridiagonalMatrix< Real, Device, Index >::getRow( const IndexType row, IndexType* columns, RealType* values ) const { - /*IndexType elementPointer( 0 ); + IndexType elementPointer( 0 ); for( IndexType i = -1; i <= 1; i++ ) { const IndexType column = row + 1; if( column >= 0 && column < this->getColumns() ) { columns[ elementPointer ] = column; - values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; + values[ elementPointer ] = this->values.getElement( this->getElementIndex( row, column ) ); elementPointer++; } - }*/ -} - - -template< typename Real, - typename Device, - typename Index > -Real& tnlTridiagonalMatrix< Real, Device, Index >::operator()( const IndexType row, - const IndexType column ) -{ - return this->values[ this->getElementIndex( row, column ) ]; + } } template< typename Real, typename Device, typename Index > -const Real& tnlTridiagonalMatrix< Real, Device, Index >::operator()( const IndexType row, - const IndexType column ) const +template< typename Vector > +#ifdef HAVE_CUDA + __device__ __host__ +#endif +typename Vector::RealType tnlTridiagonalMatrix< Real, Device, Index >::rowVectorProduct( const IndexType row, + const Vector& vector ) const { - return this->values[ this->getElementIndex( row, column ) ]; + return tnlTridiagonalMatrixDeviceDependentCode< Device >:: + rowVectorProduct( this->rows, + this->values, + row, + vector ); } +#ifdef HAVE_CUDA template< typename Real, - typename Device, - typename Index > -template< typename Vector > -typename Vector::RealType tnlTridiagonalMatrix< Real, Device, Index >::rowVectorProduct( const IndexType row, - const Vector& vector ) const + typename Index, + typename Vector > +__global__ void tnlTridiagonalMatrixVectorProductCudaKernel( tnlTridiagonalMatrix< Real, tnlCuda, Index >* matrix, + const Vector* inVector, + Vector* outVector, + const Index gridIdx ) { - if( row == 0 ) - return vector[ 0 ] * this->values[ 0 ] + - vector[ 1 ] * this->values[ 1 ]; - IndexType i = 3 * row - 1; - if( row == this->rows - 1 ) - return vector[ row - 1 ] * this->values[ i++ ] + - vector[ row ] * this->values[ i ]; - return vector[ row - 1 ] * this->values[ i++ ] + - vector[ row ] * this->values[ i++ ] + - vector[ row + 1 ] * this->values[ i ]; + const Index rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < matrix->getRows() ) + ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); } +#endif template< typename Real, typename Device, @@ -426,8 +500,32 @@ void tnlTridiagonalMatrix< Real, Device, Index >::vectorProduct( const Vector& i << "Vector size: " << outVector.getSize() << endl << "Vector name: " << outVector.getName() << endl ); - for( IndexType row = 0; row < this->getRows(); row++ ) - outVector[ row ] = this->rowVectorProduct( row, inVector ); + if( Device::getDevice() == tnlHostDevice ) + for( IndexType row = 0; row < this->getRows(); row++ ) + outVector[ row ] = this->rowVectorProduct( row, inVector ); + + if( Device::getDevice() == tnlCudaDevice ) + { +#ifdef HAVE_CUDA + ThisType* kernel_this = tnlCuda::passToDevice( *this ); + Vector* kernel_inVector = tnlCuda::passToDevice( inVector ); + Vector* kernel_outVector = tnlCuda::passToDevice( outVector ); + dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( this->getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize(); + tnlTridiagonalMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_this, kernel_inVector, kernel_outVector, gridIdx ); + } + tnlCuda::freeFromDevice( kernel_this ); + tnlCuda::freeFromDevice( kernel_inVector ); + tnlCuda::freeFromDevice( kernel_outVector ); + checkCudaDevice; +#endif + } } template< typename Real, @@ -443,38 +541,84 @@ void tnlTridiagonalMatrix< Real, Device, Index >::addMatrix( const tnlTridiagona << "This matrix rows: " << this->getRows() << endl << "This matrix name: " << this->getName() << endl ); - const IndexType lastI = this->values.getSize(); if( thisMatrixMultiplicator == 1.0 ) - { - for( IndexType i = 0; i < lastI; i++ ) - this->values[ i ] += matrixMultiplicator*matrix.values[ i ]; - } + this->values.alphaXPlusY( matrixMultiplicator, matrix.values ); else + this->values.alphaXPlusBetaY( matrixMultiplicator, matrix.values, thisMatrixMultiplicator ); +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void tnlTridiagonalMatrixTranspositionCudaKernel( const tnlTridiagonalMatrix< Real2, tnlCuda, Index2 >* inMatrix, + tnlTridiagonalMatrix< Real, tnlCuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) { - for( IndexType i = 0; i < lastI; i++ ) - this->values[ i ] = thisMatrixMultiplicator * this->values[ i ] + - matrixMultiplicator*matrix.values[ i ]; + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); } } +#endif template< typename Real, typename Device, typename Index > - template< typename Real2, typename Index2, int tileDim > + template< typename Real2, typename Index2 > void tnlTridiagonalMatrix< Real, Device, Index >::getTransposition( const tnlTridiagonalMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { tnlAssert( this->getRows() == matrix.getRows(), cerr << "This matrix rows: " << this->getRows() << endl << "That matrix rows: " << matrix.getRows() << endl ); - - const IndexType& rows = matrix.getRows(); - for( IndexType i = 1; i < rows; i++ ) + if( Device::getDevice() == tnlHostDevice ) { - RealType aux = matrix. getElement( i, i - 1 ); - this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); - this->setElement( i, i, matrix.getElement( i, i ) ); - this->setElement( i - 1, i, aux ); + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( Device::getDevice() == tnlCudaDevice ) + { +#ifdef HAVE_CUDA + ThisType* kernel_this = tnlCuda::passToDevice( *this ); + typedef tnlTridiagonalMatrix< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = tnlCuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize(); + tnlTridiagonalMatrixTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + tnlCuda::freeFromDevice( kernel_this ); + tnlCuda::freeFromDevice( kernel_inMatrix ); + checkCudaDevice; +#endif } } @@ -547,7 +691,7 @@ void tnlTridiagonalMatrix< Real, Device, Index >::print( ostream& str ) const str <<"Row: " << row << " -> "; for( IndexType column = row - 1; column < row + 2; column++ ) if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->operator()( row, column ) << "\t"; + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; str << endl; } } @@ -555,15 +699,21 @@ void tnlTridiagonalMatrix< Real, Device, Index >::print( ostream& str ) const template< typename Real, typename Device, typename Index > +#ifdef HAVE_CUDA + __device__ __host__ +#endif Index tnlTridiagonalMatrix< Real, Device, Index >::getElementIndex( const IndexType row, const IndexType column ) const { + // TODO: remove the #ifndef when CUDA supports std:cerr +#ifndef HAVE_CUDA tnlAssert( row >= 0 && column >= 0 && row < this->rows && column < this->rows, cerr << " this->rows = " << this->rows << " row = " << row << " column = " << column ); tnlAssert( abs( row - column ) < 2, cerr << "row = " << row << " column = " << column << endl ); - return 3*row + column - row; +#endif + return tnlTridiagonalMatrixDeviceDependentCode< Device >::getElementIndex( this->rows, row, column ); } diff --git a/src/matrices/tnlTridiagonalMatrix.h b/src/matrices/tnlTridiagonalMatrix.h index 5ca97720da..cba9cd70b6 100644 --- a/src/matrices/tnlTridiagonalMatrix.h +++ b/src/matrices/tnlTridiagonalMatrix.h @@ -32,6 +32,7 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename tnlMatrix< Real, Device, Index >::RowLengthsVector RowLengthsVector; + typedef tnlTridiagonalMatrix< Real, Device, Index > ThisType; tnlTridiagonalMatrix(); @@ -82,6 +83,11 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + #ifdef HAVE_CUDA __device__ __host__ #endif @@ -90,6 +96,11 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > const RealType* values, const IndexType elements ); + bool setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements ); + #ifdef HAVE_CUDA __device__ __host__ #endif @@ -99,6 +110,12 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > const IndexType elements, const RealType& thisRowMultiplicator = 1.0 ); + bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType elements, + const RealType& thisRowMultiplicator = 1.0 ); + #ifdef HAVE_CUDA __device__ __host__ #endif @@ -115,13 +132,14 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > IndexType* columns, RealType* values ) const; - RealType& operator()( const IndexType row, - const IndexType column ); - - const RealType& operator()( const IndexType row, - const IndexType column ) const; + void getRow( const IndexType row, + IndexType* columns, + RealType* values ) const; template< typename Vector > +#ifdef HAVE_CUDA + __device__ __host__ +#endif typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; @@ -135,11 +153,11 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > const RealType& thisMatrixMultiplicator = 1.0 ); #ifdef HAVE_NOT_CXX11 - template< typename Real2, typename Index2, int tileDim > + template< typename Real2, typename Index2 > void getTransposition( const tnlTridiagonalMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0 ); #else - template< typename Real2, typename Index2, int tileDim = 32 > + template< typename Real2, typename Index2 > void getTransposition( const tnlTridiagonalMatrix< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0 ); #endif @@ -162,6 +180,9 @@ class tnlTridiagonalMatrix : public tnlMatrix< Real, Device, Index > protected: +#ifdef HAVE_CUDA + __device__ __host__ +#endif IndexType getElementIndex( const IndexType row, const IndexType column ) const; diff --git a/tests/unit-tests/CMakeLists.txt b/tests/unit-tests/CMakeLists.txt index b13a994863..5741c78742 100755 --- a/tests/unit-tests/CMakeLists.txt +++ b/tests/unit-tests/CMakeLists.txt @@ -66,7 +66,11 @@ if( BUILD_CUDA ) SET_TESTS_PROPERTIES ( core/vectors/tnlVectorTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnlVectorOperationsTest-cuda${mpiExt}${debugExt} ) ADD_TEST( matrices/tnlDenseMatrixTest-cuda${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlDenseMatrixTest-cuda${mpiExt}${debugExt} ) - SET_TESTS_PROPERTIES ( matrices/tnlDenseMatrixTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnlVectorTest-cuda${mpiExt}${debugExt} ) + SET_TESTS_PROPERTIES ( matrices/tnlDenseMatrixTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnlVectorTest-cuda${mpiExt}${debugExt} ) + + ADD_TEST( matrices/tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ${EXECUTABLE_OUTPUT_PATH}/tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ) + SET_TESTS_PROPERTIES ( matrices/tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} PROPERTIES DEPENDS core/cuda/tnlDenseMatrixTest-cuda${mpiExt}${debugExt} ) + endif() diff --git a/tests/unit-tests/matrices/CMakeLists.txt b/tests/unit-tests/matrices/CMakeLists.txt index 4d9da43c63..466a1c99bd 100755 --- a/tests/unit-tests/matrices/CMakeLists.txt +++ b/tests/unit-tests/matrices/CMakeLists.txt @@ -16,16 +16,16 @@ if( BUILD_CUDA ) tnl${mpiExt}${debugExt}-0.1 ) endif() -#ADD_EXECUTABLE( tnlTridiagonalMatrixTest${mpiExt}${debugExt} ${headers} tnlTridiagonalMatrixTest.cpp ) -#TARGET_LINK_LIBRARIES( tnlTridiagonalMatrixTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} -# tnl${mpiExt}${debugExt}-0.1 ) -# -#if( BUILD_CUDA ) -# CUDA_ADD_EXECUTABLE( tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ${headers} tnlTridiagonalMatrixTest.cu -# OPTIONS -arch sm_20 ) -# TARGET_LINK_LIBRARIES( tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} -# tnl${mpiExt}${debugExt}-0.1 ) -#endif() +ADD_EXECUTABLE( tnlTridiagonalMatrixTest${mpiExt}${debugExt} ${headers} tnlTridiagonalMatrixTest.cpp ) +TARGET_LINK_LIBRARIES( tnlTridiagonalMatrixTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} + tnl${mpiExt}${debugExt}-0.1 ) + +if( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ${headers} tnlTridiagonalMatrixTest.cu + OPTIONS -arch sm_20 ) + TARGET_LINK_LIBRARIES( tnlTridiagonalMatrixTest-cuda${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} + tnl${mpiExt}${debugExt}-0.1 ) +endif() # #ADD_EXECUTABLE( tnlMultidiagonalMatrixTest${mpiExt}${debugExt} ${headers} tnlMultidiagonalMatrixTest.cpp ) #TARGET_LINK_LIBRARIES( tnlMultidiagonalMatrixTest${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} diff --git a/tests/unit-tests/matrices/tnlTridiagonalMatrixTest.cu b/tests/unit-tests/matrices/tnlTridiagonalMatrixTest.cu index d8ddc787ae..119ffe0d59 100644 --- a/tests/unit-tests/matrices/tnlTridiagonalMatrixTest.cu +++ b/tests/unit-tests/matrices/tnlTridiagonalMatrixTest.cu @@ -25,10 +25,10 @@ int main( int argc, char* argv[] ) { #ifdef HAVE_CPPUNIT - if( ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< float, tnlHost, int > >() || - ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< double, tnlHost, int > >() || - ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< float, tnlHost, long int > >() || - ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< double, tnlHost, long int > >() + if( ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< float, tnlCuda, int > >() || + ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< double, tnlCuda, int > >() || + ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< float, tnlCuda, long int > >() || + ! tnlUnitTestStarter :: run< tnlTridiagonalMatrixTester< double, tnlCuda, long int > >() ) return EXIT_FAILURE; return EXIT_SUCCESS; diff --git a/tests/unit-tests/matrices/tnlTridiagonalMatrixTester.h b/tests/unit-tests/matrices/tnlTridiagonalMatrixTester.h index 98d55a2ae2..3b9a5cfc0a 100644 --- a/tests/unit-tests/matrices/tnlTridiagonalMatrixTester.h +++ b/tests/unit-tests/matrices/tnlTridiagonalMatrixTester.h @@ -99,9 +99,9 @@ class tnlTridiagonalMatrixTester : public CppUnit :: TestCase CPPUNIT_ASSERT( m.getElement( i, i ) == i ); for( int i = 0; i < 10; i++ ) - m( i, i ) = i; + m.setElement( i, i, i ); for( int i = 0; i < 10; i++ ) - CPPUNIT_ASSERT( m( i, i ) == i ); + CPPUNIT_ASSERT( m.getElement( i, i ) == i ); } void addElementTest() @@ -134,14 +134,14 @@ class tnlTridiagonalMatrixTester : public CppUnit :: TestCase for( int i = 0; i < 10; i++ ) m.setElement( i, i, i ); - tnlVector< IndexType, Device, IndexType > columns; - VectorType values; + tnlVector< IndexType, tnlHost, IndexType > columns; + tnlVector< RealType, tnlHost, IndexType > values; columns.setSize( 3 ); values.setSize( 3 ); for( IndexType i = 4; i <= 6; i++ ) { - columns[ i - 4 ] = i; - values[ i - 4 ] = i; + columns.setElement( i - 4, i ); + values.setElement( i - 4, i ); } m.setRow( 5, columns.getData(), values.getData(), 3 ); @@ -172,9 +172,8 @@ class tnlTridiagonalMatrixTester : public CppUnit :: TestCase m.setElement( i, i, i ); } m.vectorProduct( v, w ); - for( int i = 0; i < size; i++ ) - CPPUNIT_ASSERT( w[ i ] == i*i ); + CPPUNIT_ASSERT( w.getElement( i ) == i*i ); } void addMatrixTest() @@ -185,7 +184,7 @@ class tnlTridiagonalMatrixTester : public CppUnit :: TestCase for( int i = 0; i < size; i++ ) for( int j = 0; j < size; j++ ) if( abs( i - j ) <= 1 ) - m( i, j ) = i*size + j; + m.setElement( i, j, i*size + j ); MatrixType m2; m2.setLike( m ); @@ -222,8 +221,7 @@ class tnlTridiagonalMatrixTester : public CppUnit :: TestCase MatrixType mTransposed; mTransposed.setLike( m ); mTransposed.template getTransposition< typename MatrixType::RealType, - typename MatrixType::IndexType, - 32 >( m ); + typename MatrixType::IndexType >( m ); for( int i = 0; i < size; i++ ) for( int j = 0; j < size; j++ ) -- GitLab