diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 484ff2358932e5373cd21dccda07cf3dd580ae31..6a9dab96a4913e15a0f2e970a6480f88f5946404 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -59,8 +59,6 @@ std::string getMatrixFormat( const Matrix& matrix ) return format; } -// This function is not used currently (as of 17.03.19), -// as the log takes care of printing and saving this information into the log file. // Print information about the matrix. template< typename Matrix > void printMatrixInfo( const Matrix& matrix, @@ -218,14 +216,6 @@ benchmarkSpMV( Benchmark & benchmark, // Setup cuSPARSE MetaData, since it has the same header as CSR, // and therefore will not get its own headers (rows, cols, speedup etc.) in log. // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. - - // FIXME: How to include benchmark with different name under the same header as the current format being benchmarked??? - // FIXME: Does it matter that speedup show difference only between current test and first test? - // Speedup shows difference between CPU and GPU-cuSPARSE, because in Benchmarks.h: - // * If there is no baseTime, the resulting test time is set to baseTime. - // * However, if there is a baseTime (from the CPU compared to GPU test), - // baseTime isn't changed. If we change it in Benchmarks.h to compare - // the speedup from the last test, it will mess up BLAS benchmarks etc. benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) }, { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, @@ -244,7 +234,6 @@ benchmarkSpMV( Benchmark & benchmark, resultcuSPARSEDeviceVector2 = deviceVector2; #endif -//#ifdef COMPARE_RESULTS // Difference between GPU (curent format) and GPU-cuSPARSE results Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); @@ -274,15 +263,6 @@ benchmarkSpMV( Benchmark & benchmark, std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; - // FIXME: This isn't an elegant solution, it makes the log file very long. -// benchmark.addErrorMessage( GPUcuSparse_absMax, 1 ); -// benchmark.addErrorMessage( GPUcuSparse_lpNorm, 1 ); - -// benchmark.addErrorMessage( CPUxGPU_absMax, 1 ); -// benchmark.addErrorMessage( CPUxGPU_lpNorm, 1 ); - -//#endif - std::cout << std::endl; return true; } @@ -295,15 +275,14 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool verboseMR ) { bool result = true; - // TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats) -// result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); -// result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); -// result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR ); -// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); - // AdEllpack/BiEllpack doesn't have cross-device assignment ('= operator') implemented yet - result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); -// result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); + // AdEllpack is broken +// result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); return result; } diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h index 1fcbc1494d5cd133dcaad116b2c5efde33824515..34b081914604f949668d98a8ec5d7657c5ed4086 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/AdEllpack.h @@ -81,13 +81,12 @@ public: std::cout << "HEAD==TAIL" << std::endl; else { - // TEST for( warpInfo< MatrixType >* i = this->getHead(); i != this->getTail()->next; i = i->next ) { - if( i == this->getHead() ); -// std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl; - else if( i == this->getTail() ); -// std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl; + if( i == this->getHead() ) + std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl; + else if( i == this->getTail() ) + std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl; else std::cout << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl; } diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h index dd8731fc66e4029acc192b392a73e7fb804e9acc..b01e9041e6a79d12f5ab3755e76340ea9dba135f 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/AdEllpack_impl.h @@ -147,10 +147,6 @@ warpList< MatrixType >::~warpList() delete temp; } delete this->head; - - // TEST -// std::cout << "List destructor." << std::endl; -// this->printList(); } @@ -1169,46 +1165,31 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector, IndexType i = 0; IndexType elementPtr = this->offset[ warpIdx ] + inWarpIdx; - // Save the value, to save calling access every loop. const IndexType warpLoad = this->localLoad[ warpIdx ]; - // The unroll factor is 4, therefore if a warp has less than 4 localLoad, it cannot be unrolled - // and must be calculated separately. if( warpLoad < 4 ) { - // While the helpful index of the warp localLoad is less than localLoad and the element index isn't - // out of the matrix (would return the number of columns of the matrix) while( i < warpLoad && this->columnIndexes[ elementPtr ] < this->getColumns() ) { temp[ threadIdx.x ] += inVector[ this->columnIndexes[ elementPtr ] ] * this->values[ elementPtr ]; - // For the current thread, shift the elements ptr by warpSize (to keep the thread on one row) elementPtr += this->warpSize; - i++; // Increment the helpful localLoad index. + i++; } } - else // If the localLoad of the warp is unrollable. + else { - // Is the warpLoad divisible by 4 (4 - 1 for binary AND). - // This will return how far it is from being divisible: - // For 0 & 3 = 0; 1 & 3 = 1; 2 & 3 = 2; 3 & 3 = 3; 4 & 3 = 0, etc. IndexType alignUnroll = warpLoad & 3; - // While the result of divisibility by 4 has not reached the point where it is divisble by 4. while( alignUnroll != 0 && this->columnIndexes[ elementPtr ] < this->getColumns() ) { temp[ threadIdx.x ] += inVector[ this->columnIndexes[ elementPtr ] ] * this->values[ elementPtr ]; elementPtr += this->warpSize; i++; - // If alignUnroll not 0 (i.e. no. of NNZ elements is not divisible by 4), decrement alignUnroll until it is. - // This will ensure that the i starting index with be incremented to the correct starting position for the unroll. alignUnroll--; } } - - // For those rows that have warpLoad < unroll factor, this for loop won't even get past the first condition. - // Otherwise unroll. for( ; i < this->localLoad[ warpIdx ]; i += 4 ) { #pragma unroll @@ -1222,7 +1203,6 @@ void AdEllpack< Real, Device, Index >::spmvCuda8( const InVector& inVector, } } - // What is going on here? DOCUMENT if( ( inWarpIdx == 0 ) || ( reduceMap[ threadIdx.x ] > reduceMap[ threadIdx.x - 1 ] ) ) { IndexType end = ( warpIdx + 1 ) << 5; @@ -1265,15 +1245,6 @@ void AdEllpack< Real, Device, Index >::spmvCuda16( const InVector& inVector, IndexType elementPtr = this->offset[ warpIdx ] + inWarpIdx; const IndexType warpLoad = this->localLoad[ warpIdx ]; -// for( IndexType i = 0; i < warpLoad; i++ ) -// { -// if( this->columnIndexes[ elementPtr ] < this->getColumns() ) -// { -// temp[ threadIdx.x ] += this->values[ elementPtr] * inVector[ this->columnIndexes[ elementPtr ] ]; -// elementPtr += this->warpSize; -// } -// } - if( warpLoad < 8 ) { while( i < warpLoad && @@ -1496,7 +1467,6 @@ public: InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); TNL_CHECK_CUDA_DEVICE; - std::cout << "totalLoad = " << matrix.totalLoad << std::endl; if( matrix.totalLoad < 2 ) { dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); @@ -1520,7 +1490,7 @@ public: Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } - else if( matrix.totalLoad < 4 ) // WORKS + else if( matrix.totalLoad < 4 ) { dim3 blockSize( 192 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); @@ -1543,7 +1513,7 @@ public: Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } - else if( matrix.totalLoad < 8 ) // Maybe works? + else if( matrix.totalLoad < 8 ) { dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); @@ -1566,7 +1536,7 @@ public: Devices::Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } - else if( matrix.totalLoad < 16 ) // BROKEN + else if( matrix.totalLoad < 16 ) { dim3 blockSize( 128 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); @@ -1589,7 +1559,7 @@ public: Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } - else // BROKEN + else { dim3 blockSize( 96 ), cudaGridSize( Cuda::getMaxGridSize() ); IndexType cudaBlocks = roundUpDivision( matrix.reduceMap.getSize(), blockSize.x ); @@ -1606,7 +1576,7 @@ public: kernel_outVector, gridIdx ); } - TNL_CHECK_CUDA_DEVICE; // FREEZES right here on CHECK CUDA + TNL_CHECK_CUDA_DEVICE; Devices::Cuda::freeFromDevice( kernel_this ); Devices::Cuda::freeFromDevice( kernel_inVector ); Devices::Cuda::freeFromDevice( kernel_outVector ); diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index bfb8a7e35aa677a1af434efea16d1982b27ec3ca..e20b5cd2307f0b749f18ebd252ab2e55c41422f9 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -802,10 +802,9 @@ template< typename Real, void BiEllpack< Real, Device, Index >::printValues() const { for( Index i = 0; i < this->values.getSize(); i++ ) { - // Random values are stored with the column index of getColumns(). e.g. a matrix has 4 columns, values are at column indexes 0, 1, 2, 3 and junk data at index 4. - if( this->columnIndexes.getElement( i ) != this->getColumns() ) - std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) - << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl; + if( this->columnIndexes.getElement( i ) != this->getColumns() ) + std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) + << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl; } for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) { diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 8891f5b935ece70662c2a17d2e7ca67c508af6aa..3164a7fff39bd9095f39b606cad43b8d32e7b898 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -124,41 +124,8 @@ template< typename Real, Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { // TODO: Fix/Implement - throw Exceptions::NotImplementedError( "CSR::getNonZeroRowLength is not implemented." ); -// if( std::is_same< DeviceType, Devices::Host >::value ) -// { -// ConstMatrixRow matrixRow = this->getRow( row ); -// return matrixRow.getNonZeroElementsCount(); -// } -// if( std::is_same< DeviceType, Devices::Cuda >::value ) -// { -// IndexType *cols = new IndexType[4]; -// RealType *vals = new RealType[4]; -// for( int i = 0; i < 4; i++ ) -// { -// cols[i] = i; -// vals[i] = 1.0; -// } -// ConstMatrixRow matrixRow(cols, vals, 4, 1); -// // ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow() -// // WHEN debugging with GDB: -// // (gdb) p this->rowPointers[0] -// // Could not find operator[]. -// // (gdb) p rowPointers.getElement(0) -// // Attempt to take address of value not located in memory. -// IndexType resultHost ( 0 ); -// IndexType* resultCuda = Cuda::passToDevice( resultHost ); -// // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown: -// // 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list' -// TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately -// delete []cols; -// delete []vals; -// std::cout << "Checkpoint BEFORE passFromDevice" << std::endl; -// resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE -// std::cout << "Checkpoint AFTER passFromDevice" << std::endl; -// Cuda::freeFromDevice( resultCuda ); -// return resultHost; -// } + TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" ); + return 0; } template< typename Real, @@ -223,13 +190,6 @@ bool CSR< Real, Device, Index >::addElementFast( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - /*TNL_ASSERT( row >= 0 && row < this->rows && - column >= 0 && column <= this->rows, - std::cerr << " row = " << row - << " column = " << column - << " this->rows = " << this->rows - << " this->columns = " << this-> columns );*/ - IndexType elementPtr = this->rowPointers[ row ]; const IndexType rowEnd = this->rowPointers[ row + 1 ]; IndexType col = 0; diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 32cfca2c4e68dea5a1ebc0c99753f1890eeacf9b..3826a8574fc302d86f363f1d7e72d58d89526303 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -275,22 +275,7 @@ template< typename Real, Index ChunkedEllpack< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const { ConstMatrixRow matrixRow = getRow( row ); - return matrixRow.getNonZeroElementsCount( getType< Device >() ); - -// IndexType elementCount ( 0 ); -// ConstMatrixRow matrixRow = this->getRow( row ); -// -// auto computeNonZeros = [&] /*__cuda_callable__*/ ( IndexType i ) mutable -// { -// std::cout << "matrixRow.getElementValue( i ) = " << matrixRow.getElementValue( i ) << " != 0.0" << std::endl; -// if( matrixRow.getElementValue( i ) != 0.0 ) -// elementCount++; -// -// std::cout << "End of lambda elementCount = " << elementCount << std::endl; -// }; -// -// ParallelFor< DeviceType >::exec( ( IndexType ) 0, matrixRow.getLength(), computeNonZeros ); -// return elementCount; + return matrixRow.getNonZeroElementsCount( Device::getDeviceType() ); } template< typename Real, diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index d92bccc46b4908708b608bff7e36b974dcbf7131..b99dbc88b59d425e93cc7cde2ee3fe9f31064b0a 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -655,8 +655,6 @@ Ellpack< Real, Device, Index >::operator=( const Ellpack< Real2, Device2, Index2 // setLike does not work here due to different alignment on Cuda and Host this->rowLengths = matrix.rowLengths; this->setDimensions( matrix.getRows(), matrix.getColumns() ); - -// std::cout << "DIMENSIONS set; after setDimensions in operator= cross-device" << std::endl; const int blockSize = 32; const int blocks = roundUpDivision( this->getRows(), blockSize ); diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index b3fb33856a812a0ff82eb26d134f411c62d5ed2f..dd6ddc07245150c859946e97b2fde3a66021aaa3 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -68,14 +68,8 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) ) return false; - -// std::cout << " rowLengths sizeof: " << sizeof( rowLengths ) << std::endl; -// std::cout << " rowLengths element sizeof: " << sizeof( rowLengths[0] ) << std::endl; -// std::cout << " rowLengths getSize(): " << rowLengths.getSize() << std::endl; matrix.setCompressedRowLengths( rowLengths ); - -// std::cout << "->CompressedRowLengths SET" << std::endl; if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) ) return false; @@ -347,8 +341,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, Timer timer; timer.start(); -// std::cout << "\nBefore while..." << std::endl; - while( std::getline( file, line ) ) { if( line[ 0 ] == '%' ) continue; @@ -380,8 +372,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, } } -// std::cout << "\nAfter while..." << std::endl; - file.clear(); long int fileSize = file.tellg(); timer.stop(); @@ -390,8 +380,6 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; -// std::cout << "->END of reading matrix elements from file" << std::endl; - return true; } diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Sparse_impl.h index 84d734a937bea3575ecaf737f14d400585fcca92..d1643db19a48dbf078fe04389e9cb2d061b28a26 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Sparse_impl.h @@ -109,19 +109,7 @@ template< typename Real, typename Index > void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements ) { -// std::cout << " Allocating matrix elements..." << std::endl; - // CHECKING: if the number of matrix elements is larger than the highest number the IndexType can go to? - // INT OVERFLOW - - // CORRECT? ELL stores certain matrices in such a way, which could cause the number of matrix elements - // to be greater than the maximum value IndexType can store, thus causing int overflow when - // creating the arrays "values" and "indexes". - // PROBLEM: int can overflow in such a way that it is still positive, thus rendering this assert useless. - // HOW FIX? Do we have to create special conditions for every format in its allocation method? We can't - // tell from within this method, if numberOfMatrixElements is an overflown value or not. TNL_ASSERT_GE( numberOfMatrixElements, 0, "Number of matrix elements must be non-negative." ); - -// std::cout << " numberOfMatrixElements = " << numberOfMatrixElements << std::endl; this->values.setSize( numberOfMatrixElements ); this->columnIndexes.setSize( numberOfMatrixElements ); @@ -132,8 +120,6 @@ void Sparse< Real, Device, Index >::allocateMatrixElements( const IndexType& num */ if( numberOfMatrixElements > 0 ) this->columnIndexes.setValue( this->columns ); - -// std::cout << "->END OF allocateMatrixElements!!!" << std::endl; } template< typename Real, diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index e870cd90545656d005066eade0c5f2a07773798b..8d9e9c727a0d88c40b90f0623d8b2ec8808e3f95 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -41,17 +41,14 @@ void host_test_GetType() EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); } -// QUESITON: Cant these two functions be combined into one? Because if no CUDA is present and we were to call -// CUDA into the function in the TEST, to be tested, then we could have a problem. - template< typename MatrixCudaFloat, typename MatrixCudaInt > void cuda_test_GetType() { MatrixCudaFloat mtrxCudaFloat; MatrixCudaInt mtrxCudaInt; - EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); // This is mistakenly labeled in /src/TNL/Devices/Cuda.cpp - EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); // Should be Devices::Cuda + EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); + EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); } template< typename Matrix > @@ -1397,7 +1394,6 @@ TYPED_TEST( MatrixTest, printTest ) TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) { -// test_GetMatrixProduct< Dense_host_int >(); bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; @@ -1414,7 +1410,6 @@ TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) #ifdef HAVE_CUDA TEST( DenseMatrixTest, Dense_getMatrixProductTest_Cuda ) { -// test_GetMatrixProduct< Dense_cuda_int >(); bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 8beaa5b296985c41107c7a881eea81d832f04a0c..ef5b28d240a65c5e26eb987c42b76688c59a8d87 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -28,12 +28,6 @@ void host_test_GetType() EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; std::cerr << "This test has not been implemented properly yet.\n" << std::endl; - -// MatrixHostFloat mtrxHostFloat; -// MatrixHostInt mtrxHostInt; -// -// EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::CSR< float, Devices::Host >" ) ); -// EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::CSR< int, Devices::Host >" ) ); } template< typename MatrixCudaFloat, typename MatrixCudaInt > @@ -42,13 +36,7 @@ void cuda_test_GetType() bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; - -// MatrixCudaFloat mtrxCudaFloat; -// MatrixCudaInt mtrxCudaInt; -// -// EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::CSR< float, Devices::Cuda >" ) ); -// EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::CSR< int, Devices::Cuda >" ) ); + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename Matrix > @@ -224,7 +212,6 @@ void test_SetElement() typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); -// rowLengths.setValue( 8 ); rowLengths.setElement( 0, 4 ); rowLengths.setElement( 1, 3 ); rowLengths.setElement( 2, 8 ); @@ -614,7 +601,6 @@ void test_VectorProduct() using IndexType = typename Matrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; -// Matrix totalLoad (AdEll) = 1 /* * Sets up the following 4x4 sparse matrix: * @@ -668,7 +654,6 @@ void test_VectorProduct() EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); -// Matrix totalLoad (AdEll) = 2 /* * Sets up the following 4x4 sparse matrix: * @@ -723,7 +708,6 @@ void test_VectorProduct() EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); -// Matrix totalLoad (AdEll) = 3 /* * Sets up the following 4x4 sparse matrix: * @@ -777,7 +761,6 @@ void test_VectorProduct() EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); -// Matrix totalLoad (AdEll) = 4 /* * Sets up the following 8x8 sparse matrix: * @@ -856,7 +839,6 @@ void test_VectorProduct() EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); -// Matrix totalLoad (AdEll) = 5 /* * Sets up the following 8x8 sparse matrix: * @@ -942,9 +924,6 @@ void test_VectorProduct() EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); - - - // ONE MORE TEST HERE FOR 16X16 } template< typename Matrix > @@ -1054,19 +1033,6 @@ void test_OperatorEquals() * | 22 23 24 25 26 27 28 1 | 8 * \ 29 30 31 32 33 34 35 36 / 8 */ - - /* Sorted BiELL: - * - * - * / 22 23 24 25 26 27 28 1 \ - * | 29 30 31 32 33 34 35 36 | - * | 16 17 18 19 20 21 1 | - * | 1 2 3 4 5 1 | - * | 11 12 13 14 1 | - * | 8 9 10 1 | - * | 6 7 1 | - * \ 15 1 / - */ const IndexType m_rows = 8; const IndexType m_cols = 8; @@ -1453,7 +1419,6 @@ void test_Print() std::cout.rdbuf(old_buf); //reset - //printed << printed.str() << std::endl; couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" "Row: 1 -> Col:3->4\t\n" "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h index 33a2403bc6563dff52f27da860b3dcdd71bb81a9..aac3a41a8fc895aa0c48136ad6542c69c45cb839 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h @@ -78,7 +78,6 @@ TYPED_TEST( AdEllpackMatrixTest, resetTest ) test_Reset< AdEllpackMatrixType >(); } -// SUPPOSEDLY WORKING - localLoad, offset and rowOffset are seemingly random numbers in the head and tail of WarpList. TYPED_TEST( AdEllpackMatrixTest, setElementTest ) { using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType; @@ -86,7 +85,6 @@ TYPED_TEST( AdEllpackMatrixTest, setElementTest ) test_SetElement< AdEllpackMatrixType >(); } -// SUPPOSEDLY WORKING - localLoad, offset and rowOffset are seemingly random numbers in the head and tail of WarpList. TYPED_TEST( AdEllpackMatrixTest, addElementTest ) { using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType; @@ -94,7 +92,6 @@ TYPED_TEST( AdEllpackMatrixTest, addElementTest ) test_AddElement< AdEllpackMatrixType >(); } -// SUPPOSEDLY WORKING - Tests take longer than expected. setElement takes 13ms, compared to SlicedEllpack's 2ms. TYPED_TEST( AdEllpackMatrixTest, setRowTest ) { using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType; @@ -102,7 +99,6 @@ TYPED_TEST( AdEllpackMatrixTest, setRowTest ) test_SetRow< AdEllpackMatrixType >(); } -// WORKS FOR MATRICES up to 99x99, The rest have different results. TYPED_TEST( AdEllpackMatrixTest, vectorProductTest ) { using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType; @@ -110,7 +106,6 @@ TYPED_TEST( AdEllpackMatrixTest, vectorProductTest ) test_VectorProduct< AdEllpackMatrixType >(); } -// TODO test TYPED_TEST( AdEllpackMatrixTest, operatorEqualsTest ) { using AdEllpackMatrixType = typename TestFixture::AdEllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h index e8b4412559af2fff19c8c95330f7a0cfb2ff6d80..c55eb101f009e3b39ab3bf90d3b809d0d608d0ec 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h @@ -57,7 +57,6 @@ using BiEllpackMatrixTypes = ::testing::Types TYPED_TEST_SUITE( BiEllpackMatrixTest, BiEllpackMatrixTypes); -// WORKING TYPED_TEST( BiEllpackMatrixTest, setDimensionsTest ) { using BiEllpackMatrixType = typename TestFixture::BiEllpackMatrixType; diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h index 91bd7fa9790e03e6e2a20918057c2e6c8a10dd48..5ef97a1df004095479864ab9d4ddc89fdbb54a4d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h @@ -24,10 +24,6 @@ protected: using ChunkedEllpackMatrixType = Matrix; }; -// columnIndexes of ChunkedEllpack appear to be broken, when printed, it prints out a bunch of 4s. -// rowPointers have interesting elements? 0 18 36 42 54 72 96 126 162 204 256 when rows = 10, cols = 11; rowLengths = 3 3 1 2 3 4 5 6 7 8 -// and 0 52 103 154 205 256 when rows = 5, cols = 4; rowLengths = 3 3 3 3 3 - // types for which MatrixTest is instantiated using ChEllpackMatrixTypes = ::testing::Types