Loading src/matrix/tnlAdaptiveRgCSRMatrix.h +75 −16 Original line number Diff line number Diff line Loading @@ -77,8 +77,7 @@ class tnlAdaptiveRgCSRMatrix : public tnlMatrix< Real, Device, Index > Index getArtificialZeroElements() const; Real getElement( Index row, Index column ) const { abort(); }; Real getElement( Index row, Index column ) const; bool setElement( Index row, Index colum, Loading Loading @@ -138,9 +137,10 @@ class tnlAdaptiveRgCSRMatrix : public tnlMatrix< Real, Device, Index > tnlLongVector< Index, tnlHost, Index > usedThreadsInGroup; tnlLongVector< Index, Device, Index > rowToGroupMapping; Index maxGroupSize, groupSizeStep; Index targetNonzeroesPerGroup; Index numberOfGroups; Loading Loading @@ -181,6 +181,7 @@ tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: tnlAdaptiveRgCSRMatrix( const t threads( name + " : threads" ), groupInfo( name + ": groupInfo" ), usedThreadsInGroup( name + " : usedThreadsInGroup" ), rowToGroupMapping( name + " : rowToGroupMapping" ), maxGroupSize( _maxGroupSize ), groupSizeStep(_groupSizeStep), targetNonzeroesPerGroup(_targetNonzeroesPerGroup), Loading Loading @@ -228,10 +229,12 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: setSize( Index new_size ) this -> size = new_size; if( ! groupInfo. setSize( this -> getSize()) || ! usedThreadsInGroup. setSize( this -> getSize() ) || ! threads. setSize( this -> getSize() ) ) ! threads. setSize( this -> getSize() ) || ! rowToGroupMapping. setSize( this -> getSize() ) ) return false; threads. setValue( 0 ); usedThreadsInGroup. setValue( 0 ); rowToGroupMapping. setValue( 0 ); last_nonzero_element = 0; return true; }; Loading Loading @@ -338,7 +341,7 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr threads[ i ] = threads[ i - 1 ] + threadsPerRow[ i - groupBegin - 1 ]; dbgExpr( threads[ i ] ); } usedThreadsInGroup[ groupId ] = threads[ groupEnd - groupBegin - 1 ]; // ???????/ usedThreadsInGroup[ groupId ] = threads[ groupEnd - groupBegin - 1 ]; // ??????? dbgExpr( usedThreadsInGroup[ groupId ] ); /**** Loading Loading @@ -368,6 +371,9 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr dbgExpr( groupInfo[ groupId ]. idxFirstValue ); dbgExpr( groupInfo[ groupId ]. numRounds ); for( Index i = groupBegin; i < groupEnd; i ++ ) rowToGroupMapping[ i ] = groupId; groupId++; numberOfStoredValues += cudaBlockSize * roundsFinal; groupBegin = groupEnd; Loading Loading @@ -399,6 +405,8 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr { baseRow = groupInfo[ i ]. idxFirstRow; index = groupInfo[ i ]. idxFirstValue; dbgExpr( baseRow ); dbgExpr( index ); /**** * First compute number of threads for each row. */ Loading @@ -422,11 +430,13 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr if( counters[ j ] < NZperRow[ j ] ) { Index pos = mat. row_offsets[ baseRow + j ] + counters[ j ]; dbgCout( "Inserting data from CSR format at position " << pos << " to AdaptiveRgCSR at " << index ); nonzeroElements[ index ] = mat. nonzero_elements[ pos ]; columns[ index ] = mat.columns[ pos ]; } else { dbgCout( "Inserting artificial zero to AdaptiveRgCSR at " << index ); columns[ index ] = -1; nonzeroElements[ index ] = 0.0; } Loading Loading @@ -479,6 +489,42 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlAdaptiv return true; }; template< typename Real, tnlDevice Device, typename Index > Real tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: getElement( Index row, Index column ) const { tnlAssert( 0 <= row && row < this -> getSize(), cerr << "The row is outside the matrix." ); if( Device == tnlHost ) { Index groupId = rowToGroupMapping[ row ]; Index groupRow = row - groupInfo[ groupId ]. idxFirstRow; Index groupOffset = groupInfo[ groupId ]. idxFirstValue; Index firstThread = threads[ row ]; Index lastThread = threads[ row + 1 ]; /**** * If it is the last row in a group the lastThread is taken from usedThraeds */ if( row + 1 - groupInfo[ groupId ]. idxFirstRow == groupInfo[ groupId ]. numRows ) lastThread = usedThreadsInGroup[ groupId ]; /*for( Index i = firstThread * groupInfo[ groupId ]. numRounds; i < lastThread * groupInfo[ groupId ] * numRounds; i ++ ) if( columns[ i ] == column ) return 0; //???????????????????? */ } if( Device == tnlCuda ) { tnlAssert( false, cerr << "tnlRgCSRMatrix< Real, tnlCuda, Index > ::getElement is not implemented yet." ); //TODO: implement this } } template< typename Real, tnlDevice Device, typename Index > void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector< Real, Device, Index >& vec, tnlLongVector< Real, Device, Index >& result ) const Loading @@ -503,11 +549,17 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo Index limits[MAX_ROWS + 1]; //indices of first threads for each row + index of first unused thread Real results[MAX_ROWS]; for( Index group = 0; group < this -> numberOfGroups; group ++ ) //for each group of rows /**** * Go over all groups ... */ for( Index group = 0; group < this -> numberOfGroups; group ++ ) { /**** * In each group compute partial sums of each thread */ for( Index thread = 0; thread < this -> usedThreadsInGroup[ group ]; thread ++ ) //for each used thread in group thread ++ ) { idx[ thread ] = this -> groupInfo[ group ]. idxFirstValue + thread; psum[thread] = 0; Loading @@ -521,11 +573,18 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo } } /**** * Compute local copy of thread indexes mapped to given row of the group. * (this is only to simulate copying data to the fast shared memory on GPU) */ for( Index thread = 0; thread < this -> groupInfo[ group ]. numRows; thread ++ ) //for threads corresponding to rows in group limits[ thread ] = this -> threads[ this -> groupInfo[ group ]. idxFirstRow + thread ]; //make a local copy of info about threads limits[ this -> groupInfo[ group ]. numRows ] = this -> usedThreadsInGroup[ group ]; //for convenience, add the index of first unused row thread ++ ) limits[ thread ] = this -> threads[ this -> groupInfo[ group ]. idxFirstRow + thread ]; /**** * For convenience, add the index of first unused row. */ limits[ this -> groupInfo[ group ]. numRows ] = this -> usedThreadsInGroup[ group ]; //reduction of partial sums and writing to the output for( Index thread = 0; Loading Loading @@ -555,13 +614,13 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo int gridSize = (int) desGridSize; dim3 gridDim( gridSize ), blockDim( blockSize ); AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), /*AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), vec. getVector(), nonzero_elements. getVector(), nonzeroElements. getVector(), columns. getVector(), block_info. getVector(), threads_per_row. getVector(), number_of_groups ); groupInfo. getVector(), threadsPerRow. getVector(), numberOfGroups );*/ cudaThreadSynchronize(); CHECK_CUDA_ERROR; #else Loading src/matrix/tnlRgCSRMatrix.h +5 −3 Original line number Diff line number Diff line Loading @@ -456,6 +456,7 @@ template< typename Real, tnlDevice Device, typename Index > bool tnlRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlRgCSRMatrix< Real, Device2, Index >& rgCSRMatrix ) { dbgFunctionName( "tnlRgCSRMatrix< Real, Device, Index >", "copyFrom" ); groupSize = rgCSRMatrix. getGroupSize(); if( ! this -> setSize( rgCSRMatrix. getSize() ) ) return false; Loading Loading @@ -635,7 +636,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector int gridSize = size / blockSize + ( size % blockSize != 0 ) + 1; dim3 gridDim( gridSize ), blockDim( blockSize ); if( useCache ) sparseOldCSRMatrixVectorProductKernel< Real, Index, true ><<< gridDim, blockDim >>>( size, sparseCSRMatrixVectorProductKernel< Real, Index, true ><<< gridDim, blockDim >>>( size, this -> getGroupSize(), nonzeroElements. getVector(), columns. getVector(), Loading @@ -644,7 +645,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector vec. getVector(), result. getVector() ); else sparseOldCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( size, sparseCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( size, this -> getGroupSize(), nonzeroElements. getVector(), columns. getVector(), Loading @@ -653,6 +654,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector vec. getVector(), result. getVector() ); cudaThreadSynchronize(); if( useCache ) unbindRgCSRMatrixCUDATexture( vec. getVector() ); CHECK_CUDA_ERROR; #else Loading tests/Makefile.am +11 −11 Original line number Diff line number Diff line Loading @@ -18,15 +18,15 @@ matrix_solvers_benchmark_sources = matrix-solvers-benchmark.h tnl_benchmarks_sources = tnl-benchmarks.h #if BUILD_CUDA #matrix_formats_test_sources += matrix-formats-test-cuda.cu #sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu #matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu #tnl_benchmarks_sources += tnl-benchmarks-cuda.cu matrix_formats_test_sources += matrix-formats-test-cuda.cu sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu tnl_benchmarks_sources += tnl-benchmarks-cuda.cu #else matrix_formats_test_sources += matrix-formats-test.cpp sparse_matrix_benchmark_sources += sparse-matrix-benchmark.cpp matrix_solvers_benchmark_sources += matrix-solvers-benchmark.cpp tnl_benchmarks_sources += tnl-benchmarks.cpp #matrix_formats_test_sources += matrix-formats-test.cpp #sparse_matrix_benchmark_sources += sparse-matrix-benchmark.cpp #matrix_solvers_benchmark_sources += matrix-solvers-benchmark.cpp #tnl_benchmarks_sources += tnl-benchmarks.cpp #endif matrix_formats_test_SOURCES = $(matrix_formats_test_sources) Loading Loading @@ -77,9 +77,9 @@ matrix_solvers_benchmark_dbg_LDADD = ../src/libtnl-0.1.la \ endif #if BUILD_CUDA #matrix_formats_test_CXXFLAGS = -DHAVE_CUDA #sparse_matrix_benchmark_CXXFLAGS += -DHAVE_CUDA #tnl_benchmarks_CXXFLAGS = -DHAVE_CUDA matrix_formats_test_CXXFLAGS = -DHAVE_CUDA sparse_matrix_benchmark_CXXFLAGS += -DHAVE_CUDA tnl_benchmarks_CXXFLAGS = -DHAVE_CUDA #endif Loading tests/Makefile.in +142 −86 File changed.Preview size limit exceeded, changes collapsed. Show changes tests/convert-matrices +0 −28 Original line number Diff line number Diff line Loading @@ -120,34 +120,6 @@ do echo "Matrix $file.double.bin.bz2 was already converted." fi ####### ## Descend ordering ### if test ! -e $file.descend.float.bin.bz2; then echo "Converting matrix $file.descend.float.bin.bz2 ..." if test ! -e $file.descend; then ./reorder-csr -D -g 32 -i $file -o $file.descend fi $TNL_MATRIX_CONVERT --input-file $file.descend --output-file $file.descend.float.bin.bz2 --precision float --verbose yes --verify no #rm $file.descend else echo "Matrix $file.descend.float.bin.bz2 was already converted." fi if test ! -e $file.descend.double.bin.bz2; then echo "Converting matrix $file.descend.double.bin.bz2 ..." if test ! -e $file.descend; then ./reorder-csr -D -g 32 -i $file -o $file.descend fi $TNL_MATRIX_CONVERT --input-file $file.descend --output-file $file.descend.double.bin.bz2 --precision double --verbose yes --verify no #rm $file.descend else echo "Matrix $file.descend.double.bin.bz2 was already converted." fi ####### ## AMD ordering ### Loading Loading
src/matrix/tnlAdaptiveRgCSRMatrix.h +75 −16 Original line number Diff line number Diff line Loading @@ -77,8 +77,7 @@ class tnlAdaptiveRgCSRMatrix : public tnlMatrix< Real, Device, Index > Index getArtificialZeroElements() const; Real getElement( Index row, Index column ) const { abort(); }; Real getElement( Index row, Index column ) const; bool setElement( Index row, Index colum, Loading Loading @@ -138,9 +137,10 @@ class tnlAdaptiveRgCSRMatrix : public tnlMatrix< Real, Device, Index > tnlLongVector< Index, tnlHost, Index > usedThreadsInGroup; tnlLongVector< Index, Device, Index > rowToGroupMapping; Index maxGroupSize, groupSizeStep; Index targetNonzeroesPerGroup; Index numberOfGroups; Loading Loading @@ -181,6 +181,7 @@ tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: tnlAdaptiveRgCSRMatrix( const t threads( name + " : threads" ), groupInfo( name + ": groupInfo" ), usedThreadsInGroup( name + " : usedThreadsInGroup" ), rowToGroupMapping( name + " : rowToGroupMapping" ), maxGroupSize( _maxGroupSize ), groupSizeStep(_groupSizeStep), targetNonzeroesPerGroup(_targetNonzeroesPerGroup), Loading Loading @@ -228,10 +229,12 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: setSize( Index new_size ) this -> size = new_size; if( ! groupInfo. setSize( this -> getSize()) || ! usedThreadsInGroup. setSize( this -> getSize() ) || ! threads. setSize( this -> getSize() ) ) ! threads. setSize( this -> getSize() ) || ! rowToGroupMapping. setSize( this -> getSize() ) ) return false; threads. setValue( 0 ); usedThreadsInGroup. setValue( 0 ); rowToGroupMapping. setValue( 0 ); last_nonzero_element = 0; return true; }; Loading Loading @@ -338,7 +341,7 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr threads[ i ] = threads[ i - 1 ] + threadsPerRow[ i - groupBegin - 1 ]; dbgExpr( threads[ i ] ); } usedThreadsInGroup[ groupId ] = threads[ groupEnd - groupBegin - 1 ]; // ???????/ usedThreadsInGroup[ groupId ] = threads[ groupEnd - groupBegin - 1 ]; // ??????? dbgExpr( usedThreadsInGroup[ groupId ] ); /**** Loading Loading @@ -368,6 +371,9 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr dbgExpr( groupInfo[ groupId ]. idxFirstValue ); dbgExpr( groupInfo[ groupId ]. numRounds ); for( Index i = groupBegin; i < groupEnd; i ++ ) rowToGroupMapping[ i ] = groupId; groupId++; numberOfStoredValues += cudaBlockSize * roundsFinal; groupBegin = groupEnd; Loading Loading @@ -399,6 +405,8 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr { baseRow = groupInfo[ i ]. idxFirstRow; index = groupInfo[ i ]. idxFirstValue; dbgExpr( baseRow ); dbgExpr( index ); /**** * First compute number of threads for each row. */ Loading @@ -422,11 +430,13 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr if( counters[ j ] < NZperRow[ j ] ) { Index pos = mat. row_offsets[ baseRow + j ] + counters[ j ]; dbgCout( "Inserting data from CSR format at position " << pos << " to AdaptiveRgCSR at " << index ); nonzeroElements[ index ] = mat. nonzero_elements[ pos ]; columns[ index ] = mat.columns[ pos ]; } else { dbgCout( "Inserting artificial zero to AdaptiveRgCSR at " << index ); columns[ index ] = -1; nonzeroElements[ index ] = 0.0; } Loading Loading @@ -479,6 +489,42 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlAdaptiv return true; }; template< typename Real, tnlDevice Device, typename Index > Real tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: getElement( Index row, Index column ) const { tnlAssert( 0 <= row && row < this -> getSize(), cerr << "The row is outside the matrix." ); if( Device == tnlHost ) { Index groupId = rowToGroupMapping[ row ]; Index groupRow = row - groupInfo[ groupId ]. idxFirstRow; Index groupOffset = groupInfo[ groupId ]. idxFirstValue; Index firstThread = threads[ row ]; Index lastThread = threads[ row + 1 ]; /**** * If it is the last row in a group the lastThread is taken from usedThraeds */ if( row + 1 - groupInfo[ groupId ]. idxFirstRow == groupInfo[ groupId ]. numRows ) lastThread = usedThreadsInGroup[ groupId ]; /*for( Index i = firstThread * groupInfo[ groupId ]. numRounds; i < lastThread * groupInfo[ groupId ] * numRounds; i ++ ) if( columns[ i ] == column ) return 0; //???????????????????? */ } if( Device == tnlCuda ) { tnlAssert( false, cerr << "tnlRgCSRMatrix< Real, tnlCuda, Index > ::getElement is not implemented yet." ); //TODO: implement this } } template< typename Real, tnlDevice Device, typename Index > void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector< Real, Device, Index >& vec, tnlLongVector< Real, Device, Index >& result ) const Loading @@ -503,11 +549,17 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo Index limits[MAX_ROWS + 1]; //indices of first threads for each row + index of first unused thread Real results[MAX_ROWS]; for( Index group = 0; group < this -> numberOfGroups; group ++ ) //for each group of rows /**** * Go over all groups ... */ for( Index group = 0; group < this -> numberOfGroups; group ++ ) { /**** * In each group compute partial sums of each thread */ for( Index thread = 0; thread < this -> usedThreadsInGroup[ group ]; thread ++ ) //for each used thread in group thread ++ ) { idx[ thread ] = this -> groupInfo[ group ]. idxFirstValue + thread; psum[thread] = 0; Loading @@ -521,11 +573,18 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo } } /**** * Compute local copy of thread indexes mapped to given row of the group. * (this is only to simulate copying data to the fast shared memory on GPU) */ for( Index thread = 0; thread < this -> groupInfo[ group ]. numRows; thread ++ ) //for threads corresponding to rows in group limits[ thread ] = this -> threads[ this -> groupInfo[ group ]. idxFirstRow + thread ]; //make a local copy of info about threads limits[ this -> groupInfo[ group ]. numRows ] = this -> usedThreadsInGroup[ group ]; //for convenience, add the index of first unused row thread ++ ) limits[ thread ] = this -> threads[ this -> groupInfo[ group ]. idxFirstRow + thread ]; /**** * For convenience, add the index of first unused row. */ limits[ this -> groupInfo[ group ]. numRows ] = this -> usedThreadsInGroup[ group ]; //reduction of partial sums and writing to the output for( Index thread = 0; Loading Loading @@ -555,13 +614,13 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo int gridSize = (int) desGridSize; dim3 gridDim( gridSize ), blockDim( blockSize ); AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), /*AdaptiveRgCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( result. getVector(), vec. getVector(), nonzero_elements. getVector(), nonzeroElements. getVector(), columns. getVector(), block_info. getVector(), threads_per_row. getVector(), number_of_groups ); groupInfo. getVector(), threadsPerRow. getVector(), numberOfGroups );*/ cudaThreadSynchronize(); CHECK_CUDA_ERROR; #else Loading
src/matrix/tnlRgCSRMatrix.h +5 −3 Original line number Diff line number Diff line Loading @@ -456,6 +456,7 @@ template< typename Real, tnlDevice Device, typename Index > bool tnlRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlRgCSRMatrix< Real, Device2, Index >& rgCSRMatrix ) { dbgFunctionName( "tnlRgCSRMatrix< Real, Device, Index >", "copyFrom" ); groupSize = rgCSRMatrix. getGroupSize(); if( ! this -> setSize( rgCSRMatrix. getSize() ) ) return false; Loading Loading @@ -635,7 +636,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector int gridSize = size / blockSize + ( size % blockSize != 0 ) + 1; dim3 gridDim( gridSize ), blockDim( blockSize ); if( useCache ) sparseOldCSRMatrixVectorProductKernel< Real, Index, true ><<< gridDim, blockDim >>>( size, sparseCSRMatrixVectorProductKernel< Real, Index, true ><<< gridDim, blockDim >>>( size, this -> getGroupSize(), nonzeroElements. getVector(), columns. getVector(), Loading @@ -644,7 +645,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector vec. getVector(), result. getVector() ); else sparseOldCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( size, sparseCSRMatrixVectorProductKernel< Real, Index, false ><<< gridDim, blockDim >>>( size, this -> getGroupSize(), nonzeroElements. getVector(), columns. getVector(), Loading @@ -653,6 +654,7 @@ void tnlRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLongVector vec. getVector(), result. getVector() ); cudaThreadSynchronize(); if( useCache ) unbindRgCSRMatrixCUDATexture( vec. getVector() ); CHECK_CUDA_ERROR; #else Loading
tests/Makefile.am +11 −11 Original line number Diff line number Diff line Loading @@ -18,15 +18,15 @@ matrix_solvers_benchmark_sources = matrix-solvers-benchmark.h tnl_benchmarks_sources = tnl-benchmarks.h #if BUILD_CUDA #matrix_formats_test_sources += matrix-formats-test-cuda.cu #sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu #matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu #tnl_benchmarks_sources += tnl-benchmarks-cuda.cu matrix_formats_test_sources += matrix-formats-test-cuda.cu sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu tnl_benchmarks_sources += tnl-benchmarks-cuda.cu #else matrix_formats_test_sources += matrix-formats-test.cpp sparse_matrix_benchmark_sources += sparse-matrix-benchmark.cpp matrix_solvers_benchmark_sources += matrix-solvers-benchmark.cpp tnl_benchmarks_sources += tnl-benchmarks.cpp #matrix_formats_test_sources += matrix-formats-test.cpp #sparse_matrix_benchmark_sources += sparse-matrix-benchmark.cpp #matrix_solvers_benchmark_sources += matrix-solvers-benchmark.cpp #tnl_benchmarks_sources += tnl-benchmarks.cpp #endif matrix_formats_test_SOURCES = $(matrix_formats_test_sources) Loading Loading @@ -77,9 +77,9 @@ matrix_solvers_benchmark_dbg_LDADD = ../src/libtnl-0.1.la \ endif #if BUILD_CUDA #matrix_formats_test_CXXFLAGS = -DHAVE_CUDA #sparse_matrix_benchmark_CXXFLAGS += -DHAVE_CUDA #tnl_benchmarks_CXXFLAGS = -DHAVE_CUDA matrix_formats_test_CXXFLAGS = -DHAVE_CUDA sparse_matrix_benchmark_CXXFLAGS += -DHAVE_CUDA tnl_benchmarks_CXXFLAGS = -DHAVE_CUDA #endif Loading
tests/convert-matrices +0 −28 Original line number Diff line number Diff line Loading @@ -120,34 +120,6 @@ do echo "Matrix $file.double.bin.bz2 was already converted." fi ####### ## Descend ordering ### if test ! -e $file.descend.float.bin.bz2; then echo "Converting matrix $file.descend.float.bin.bz2 ..." if test ! -e $file.descend; then ./reorder-csr -D -g 32 -i $file -o $file.descend fi $TNL_MATRIX_CONVERT --input-file $file.descend --output-file $file.descend.float.bin.bz2 --precision float --verbose yes --verify no #rm $file.descend else echo "Matrix $file.descend.float.bin.bz2 was already converted." fi if test ! -e $file.descend.double.bin.bz2; then echo "Converting matrix $file.descend.double.bin.bz2 ..." if test ! -e $file.descend; then ./reorder-csr -D -g 32 -i $file -o $file.descend fi $TNL_MATRIX_CONVERT --input-file $file.descend --output-file $file.descend.double.bin.bz2 --precision double --verbose yes --verify no #rm $file.descend else echo "Matrix $file.descend.double.bin.bz2 was already converted." fi ####### ## AMD ordering ### Loading