diff --git a/src/Benchmarks/Benchmarks.h b/src/Benchmarks/Benchmarks.h index 4caf0fbda397f92d8cb7c143a12896b89600beb0..cbd628b03e13f9c792e2b9dd90b5573ea3ea4568 100644 --- a/src/Benchmarks/Benchmarks.h +++ b/src/Benchmarks/Benchmarks.h @@ -51,7 +51,13 @@ struct BenchmarkResult virtual RowElements getRowElements() const { - return RowElements({ time, stddev, stddev / time, bandwidth, speedup }); + RowElements elements; + elements << time << stddev << stddev / time << bandwidth; + if( speedup != 0 ) + elements << speedup; + else + elements << "N/A"; + return elements; } }; diff --git a/src/Benchmarks/LinearSolvers/benchmarks.h b/src/Benchmarks/LinearSolvers/benchmarks.h index 7b22cdfc1ef30aaddc29cc35067962ed4530f257..a4c04578d5553f2f039a8e7fc575de0ad116c48d 100644 --- a/src/Benchmarks/LinearSolvers/benchmarks.h +++ b/src/Benchmarks/LinearSolvers/benchmarks.h @@ -160,8 +160,14 @@ benchmarkSolver( Benchmark& benchmark, r = b - r; const double residue_true = lpNorm( r, 2.0 ) / lpNorm( b, 2.0 ); - return RowElements({ time, stddev, stddev/time, speedup, (double) converged, (double) iterations, - residue_precond, residue_true }); + RowElements elements; + elements << time << stddev << stddev/time; + if( speedup != 0 ) + elements << speedup; + else + elements << "N/A"; + elements << ( converged ? "yes" : "no" ) << iterations << residue_precond << residue_true; + return elements; } }; MyBenchmarkResult benchmarkResult( solver, matrix, x, b ); diff --git a/src/Benchmarks/Logging.h b/src/Benchmarks/Logging.h index 61608d364e769fe4b6e68a0691957745ee805496..fb4426bb13daa9f59e2518c1ed11a971ccd525ab 100644 --- a/src/Benchmarks/Logging.h +++ b/src/Benchmarks/Logging.h @@ -25,6 +25,55 @@ namespace TNL { namespace Benchmarks { +class LoggingRowElements +{ + public: + + LoggingRowElements() + { + stream << std::setprecision( 6 ) << std::fixed; + } + + template< typename T > + LoggingRowElements& operator << ( const T& b ) + { + stream << b; + elements.push_back( stream.str() ); + stream.str( std::string() ); + return *this; + } + + LoggingRowElements& operator << ( decltype( std::setprecision( 2 ) )& setprec ) + { + stream << setprec; + return *this; + } + + LoggingRowElements& operator << ( decltype( std::fixed )& setfixed ) // the same works also for std::scientific + { + stream << setfixed; + return *this; + } + + // iterators + auto begin() noexcept { return elements.begin(); } + + auto begin() const noexcept { return elements.begin(); } + + auto cbegin() const noexcept { return elements.cbegin(); } + + auto end() noexcept { return elements.end(); } + + auto end() const noexcept { return elements.end(); } + + auto cend() const noexcept { return elements.cend(); } + + protected: + std::list< String > elements; + + std::stringstream stream; +}; + class Logging { public: @@ -33,7 +82,7 @@ public: using MetadataColumns = std::vector<MetadataElement>; using HeaderElements = std::vector< String >; - using RowElements = std::vector< double >; + using RowElements = LoggingRowElements; Logging( int verbose = true ) : verbose(verbose) @@ -131,9 +180,7 @@ public: // spanning element is printed as usual column to stdout std::cout << std::setw( 15 ) << spanningElement; for( auto & it : subElements ) { - std::cout << std::setw( 15 ); - if( it != 0.0 )std::cout << it; - else std::cout << "N/A"; + std::cout << std::setw( 15 ) << it; } std::cout << std::endl; } @@ -147,8 +194,7 @@ public: // benchmark data are indented const String indent = " "; for( auto & it : subElements ) { - if( it != 0.0 ) log << indent << it << std::endl; - else log << indent << "N/A" << std::endl; + log << indent << it << std::endl; } } diff --git a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h index 699be6efd1be5a06f978dfdc8da6d8dbed71add2..02ef6b61220fa404a704f36443f936c64b51174f 100644 --- a/src/Benchmarks/SpMV/SpmvBenchmarkResult.h +++ b/src/Benchmarks/SpMV/SpmvBenchmarkResult.h @@ -15,37 +15,42 @@ namespace TNL { namespace Benchmarks { -template< typename Real = double, - typename Index = int > +template< typename Real, + typename Device, + typename Index > struct SpmvBenchmarkResult : public BenchmarkResult { using RealType = Real; + using DeviceType = Device; using IndexType = Index; using HostVector = Containers::Vector< Real, Devices::Host, Index >; - using CudaVector = Containers::Vector< Real, Devices::Cuda, Index >; + using BenchmarkVector = Containers::Vector< Real, Device, Index >; - SpmvBenchmarkResult( CudaVector& cudaResult, HostVector& hostResult, CudaVector& cusparseResult ) - : hostResult( hostResult ), cudaResult( cudaResult), cusparseResult( cusparseResult ){}; + SpmvBenchmarkResult( const HostVector& csrResult, const BenchmarkVector& benchmarkResult ) + : csrResult( csrResult ), benchmarkResult( benchmarkResult ){}; virtual HeaderElements getTableHeader() const override { - return HeaderElements({"time", "stddev", "stddev/time", "speedup", "Host.Diff.Max", "Host.Diff.L2", "Cusparse.Diff.Max", "Cusparse.Diff.L2"}); + return HeaderElements( {"time", "stddev", "stddev/time", "bandwidth", "speedup", "CSR Diff.Max", "CSR Diff.L2"} ); } virtual RowElements getRowElements() const override { - HostVector cudaCopy, cusparseCopy, a, b; - cudaCopy = cudaResult; - cusparseCopy = cusparseResult; - a = cudaCopy - hostResult; - b = cudaCopy - cusparseCopy; - return RowElements({ time, stddev, stddev/time, speedup, max( abs( a ) ), lpNorm( a, 2.0 ), max( abs( b ) ), lpNorm( b, 2.0 ) }); + HostVector benchmarkResultCopy; + benchmarkResultCopy = benchmarkResult; + auto diff = csrResult - benchmarkResultCopy; + RowElements elements; + elements << time << stddev << stddev/time << bandwidth; + if( speedup != 0.0 ) + elements << speedup; + else elements << "N/A"; + elements << max( abs( diff ) ) << lpNorm( diff, 2.0 ); + return elements; } - HostVector &hostResult; - - CudaVector &cudaResult, &cusparseResult; + const HostVector& csrResult; + const BenchmarkVector& benchmarkResult; }; } //namespace Benchmarks diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv-legacy.h similarity index 57% rename from src/Benchmarks/SpMV/spmv.h rename to src/Benchmarks/SpMV/spmv-legacy.h index 8851e4114362024953255ce7a7b1b82322b2bab6..b7bdba6ca973e898d08cac7fda991b98aba4eb59 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv-legacy.h @@ -38,7 +38,8 @@ using namespace TNL::Matrices; #include "cusparseCSRMatrix.h" namespace TNL { -namespace Benchmarks { + namespace Benchmarks { + namespace SpMVLegacy { // Alias to match the number of template parameters with other formats template< typename Real, typename Device, typename Index > @@ -111,142 +112,66 @@ template< typename Real, template< typename, typename, typename, typename > class Vector = Containers::Vector > void benchmarkSpMV( Benchmark& benchmark, + const TNL::Containers::Vector< Real, Devices::Host, int >& csrResultVector, const String& inputFileName, bool verboseMR ) { - // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function - using CSR_HostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >; - using CSR_DeviceMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >; - - CSR_HostMatrix CSRhostMatrix; - CSR_DeviceMatrix CSRdeviceMatrix; - - // Read the matrix for CSR, to set up cuSPARSE - MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ); - -#ifdef HAVE_CUDA - // cuSPARSE handle setup - cusparseHandle_t cusparseHandle; - cusparseCreate( &cusparseHandle ); - - // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device - CSRdeviceMatrix = CSRhostMatrix; - - // Delete the CSRhostMatrix, so it doesn't take up unnecessary space - CSRhostMatrix.reset(); - - // Initialize the cusparseCSR matrix. - TNL::CusparseCSR< Real > cusparseCSR; - cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle ); -#endif - - // Setup the format which is given as a template parameter to this function - typedef Matrix< Real, Devices::Host, int > HostMatrix; - typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix; - typedef Containers::Vector< Real, Devices::Host, int > HostVector; - typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector; + using HostMatrix = Matrix< Real, Devices::Host, int >; + using CudaMatrix = Matrix< Real, Devices::Cuda, int >; + using HostVector = Containers::Vector< Real, Devices::Host, int >; + using CudaVector = Containers::Vector< Real, Devices::Cuda, int >; HostMatrix hostMatrix; - DeviceMatrix deviceMatrix; - HostVector hostVector, hostVector2; - CudaVector deviceVector, deviceVector2, cusparseVector; + CudaMatrix cudaMatrix; - // Load the format MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); - - // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), - // because we need the matrix loaded first to get the rows and columns benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) }, { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, - { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) } + { "matrix format", MatrixInfo< HostMatrix >::getFormat() } } )); + const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); + const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; + benchmark.setOperation( datasetSize ); - hostVector.setSize( hostMatrix.getColumns() ); - hostVector2.setSize( hostMatrix.getRows() ); + /*** + * Benchmark SpMV on host + */ + HostVector hostInVector( hostMatrix.getColumns() ), hostOutVector( hostMatrix.getRows() ); -#ifdef HAVE_CUDA - deviceMatrix = hostMatrix; - deviceVector.setSize( hostMatrix.getColumns() ); - deviceVector2.setSize( hostMatrix.getRows() ); - cusparseVector.setSize( hostMatrix.getRows() ); -#endif - - // reset function auto resetHostVectors = [&]() { - hostVector = 1.0; - hostVector2 = 0.0; - }; -#ifdef HAVE_CUDA - auto resetCudaVectors = [&]() { - deviceVector = 1.0; - deviceVector2 = 0.0; + hostInVector = 1.0; + hostOutVector = 0.0; }; - auto resetCusparseVectors = [&]() { - deviceVector = 1.0; - cusparseVector == 0.0; - }; - #endif - - const int elements = hostMatrix.getNumberOfNonzeroMatrixElements(); - const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; - // compute functions auto spmvHost = [&]() { - hostMatrix.vectorProduct( hostVector, hostVector2 ); - }; -#ifdef HAVE_CUDA - auto spmvCuda = [&]() { - deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); - }; + hostMatrix.vectorProduct( hostInVector, hostOutVector ); - auto spmvCusparse = [&]() { - cusparseCSR.vectorProduct( deviceVector, cusparseVector ); }; -#endif - - benchmark.setOperation( datasetSize ); - benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost ); - - // Initialize the host vector to be compared. - // (The values in hostVector2 will be reset when spmvCuda starts) - HostVector resultHostVector2; - resultHostVector2.setSize( hostVector2.getSize() ); - resultHostVector2.setValue( 0.0 ); - - // Copy the values - resultHostVector2 = hostVector2; + SpmvBenchmarkResult< Real, Devices::Host, int > hostBenchmarkResults( csrResultVector, hostOutVector ); + benchmark.time< Devices::Host >( resetHostVectors, "CPU", spmvHost, hostBenchmarkResults ); + /*** + * Benchmark SpMV on CUDA + */ #ifdef HAVE_CUDA - benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda ); - - // Initialize the device vector to be compared. - // (The values in deviceVector2 will be reset when spmvCusparse starts) - HostVector resultDeviceVector2; - resultDeviceVector2.setSize( deviceVector2.getSize() ); - resultDeviceVector2.setValue( 0.0 ); - - resultDeviceVector2 = deviceVector2; - - // Setup cuSPARSE MetaData, since it has the same header as CSR, - // and therefore will not get its own headers (rows, cols, speedup etc.) in log. - // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. - benchmark.setMetadataColumns( Benchmark::MetadataColumns({ - { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) }, - { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, - { "rows", convertToString( hostMatrix.getRows() ) }, - { "columns", convertToString( hostMatrix.getColumns() ) }, - { "matrix format", convertToString( "CSR-cuSPARSE-" + getFormatShort( hostMatrix ) ) } - } )); + cudaMatrix = hostMatrix; + CudaVector cudaInVector( hostMatrix.getColumns() ), cudaOutVector( hostMatrix.getRows() ); - SpmvBenchmarkResult< Real, int > benchmarkResult( deviceVector2, hostVector2, cusparseVector ); - benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, benchmarkResult ); + auto resetCudaVectors = [&]() { + cudaInVector = 1.0; + cudaOutVector = 0.0; + }; + auto spmvCuda = [&]() { + cudaMatrix.vectorProduct( cudaInVector, cudaOutVector ); + }; + SpmvBenchmarkResult< Real, Devices::Cuda, int > cudaBenchmarkResults( csrResultVector, cudaOutVector ); + benchmark.time< Devices::Cuda >( resetCudaVectors, "GPU", spmvCuda, cudaBenchmarkResults ); #endif - std::cout << std::endl; } @@ -257,27 +182,96 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) { - benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); - - benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); - - benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); - benchmarkSpMV< Real, Matrices::Legacy::BiEllpack >( benchmark, inputFileName, verboseMR ); + using CSRHostMatrix = Matrices::Legacy::CSR< Real, Devices::Host, int >; + using CSRCudaMatrix = Matrices::Legacy::CSR< Real, Devices::Cuda, int >; + using HostVector = Containers::Vector< Real, Devices::Host, int >; + using CudaVector = Containers::Vector< Real, Devices::Cuda, int >; + + CSRHostMatrix csrHostMatrix; + CSRCudaMatrix csrCudaMatrix; + + //// + // Set-up benchmark datasize + // + MatrixReader< CSRHostMatrix >::readMtxFile( inputFileName, csrHostMatrix, verboseMR ); + const int elements = csrHostMatrix.getNumberOfNonzeroMatrixElements(); + const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB; + benchmark.setOperation( datasetSize ); + + //// + // Perform benchmark on host with CSR as a reference CPU format + // + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) }, + { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, + { "rows", convertToString( csrHostMatrix.getRows() ) }, + { "columns", convertToString( csrHostMatrix.getColumns() ) }, + { "matrix format", String( "CSR" ) } + } )); + + HostVector hostInVector( csrHostMatrix.getRows() ), hostOutVector( csrHostMatrix.getRows() ); + + auto resetHostVectors = [&]() { + hostInVector = 1.0; + hostOutVector == 0.0; + }; + + auto spmvCSRHost = [&]() { + csrHostMatrix.vectorProduct( hostInVector, hostOutVector ); + }; + + benchmark.time< Devices::Cuda >( resetHostVectors, "CPU", spmvCSRHost ); //// - // Segments based sparse matrices - - + // Perform benchmark on CUDA device with cuSparse as a reference GPU format // +#ifdef HAVE_CUDA + benchmark.setMetadataColumns( Benchmark::MetadataColumns({ + { "matrix name", convertToString( getMatrixFileName( inputFileName ) ) }, + { "non-zeros", convertToString( csrHostMatrix.getNumberOfNonzeroMatrixElements() ) }, + { "rows", convertToString( csrHostMatrix.getRows() ) }, + { "columns", convertToString( csrHostMatrix.getColumns() ) }, + { "matrix format", String( "cuSparse" ) } + } )); + + cusparseHandle_t cusparseHandle; + cusparseCreate( &cusparseHandle ); + + csrCudaMatrix = csrHostMatrix; + + // Delete the CSRhostMatrix, so it doesn't take up unnecessary space + csrHostMatrix.reset(); + + TNL::CusparseCSR< Real > cusparseMatrix; + cusparseMatrix.init( csrCudaMatrix, &cusparseHandle ); + + CudaVector cusparseInVector( csrCudaMatrix.getColumns() ), cusparseOutVector( csrCudaMatrix.getRows() ); + + auto resetCusparseVectors = [&]() { + cusparseInVector = 1.0; + cusparseOutVector == 0.0; + }; + + auto spmvCusparse = [&]() { + cusparseMatrix.vectorProduct( cusparseInVector, cusparseOutVector ); + }; + + benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse ); +#endif - // AdEllpack is broken - // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); - //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::Legacy::BiEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + /* AdEllpack is broken + benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, hostOutVector, inputFileName, verboseMR ); + */ } +} // namespace SpMVLegacy } // namespace Benchmarks } // namespace TNL diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index 4c6aea68ed065d331fb0f34663c128f132ecb6e3..d8e2003fb5f9e3932d0964696ebf828b429f8f01 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -20,7 +20,7 @@ #include <Benchmarks/BLAS/array-operations.h> #include <Benchmarks/BLAS/vector-operations.h> -#include "spmv.h" +#include "spmv-legacy.h" #include <TNL/Matrices/MatrixReader.h> using namespace TNL::Matrices; @@ -37,14 +37,14 @@ runSpMVBenchmarks( Benchmark & benchmark, const String & inputFileName, bool verboseMR = false ) { - const String precision = getType< Real >(); - metadata["precision"] = precision; - - // Sparse matrix-vector multiplication - benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", - metadata ); - // Start the actual benchmark in spmv.h - benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, verboseMR ); + const String precision = getType< Real >(); + metadata["precision"] = precision; + + // Sparse matrix-vector multiplication + benchmark.newBenchmark( String("Sparse matrix-vector multiplication (") + precision + ")", + metadata ); + // Start the actual benchmark in spmv.h + SpMVLegacy::benchmarkSpmvSynthetic< Real >( benchmark, inputFileName, verboseMR ); } // Get current date time to have different log files names and avoid overwriting. diff --git a/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py new file mode 100755 index 0000000000000000000000000000000000000000..5b44acc1f1faf0a55dca9f7adee14b3f2ac9ed99 --- /dev/null +++ b/src/Benchmarks/scripts/tnl-spmv-benchmark-make-tables.py @@ -0,0 +1,217 @@ +#! /usr/bin/env python3 + +import os +import re +import math +import pandas + +from TNL.LogParser import LogParser + +#pandas.options.display.float_format = "{:.2f}".format +pandas.options.display.float_format = "{:.2e}".format +pandas.options.display.width = 0 # auto-detect terminal width for formatting +pandas.options.display.max_rows = None + +def parse_file(fname): + parser = LogParser() + for metadata, df in parser.readFile(fname): + yield df + +def calculate_efficiency(df, nodes_col_index, base_column=None): + if base_column is None: + base_column = df[df.columns[0]] + eff_rows = [] + for i in df.index: + row = df.loc[i] + eff_row = row.copy() + eff_idx = ("eff", *row.name[1:]) + base = base_column[i] + for j in row.index: + if isinstance(j, int): + n = j + else: + n = j[nodes_col_index] + eff_row[j] = base / row[j] / n + eff_rows.append(eff_row) + eff_df = pandas.DataFrame(eff_rows) + eff_df.index = pandas.MultiIndex.from_tuples(eff_df.index) + eff_df = eff_df.rename(index={"time": "eff"}) + return df.append(eff_df) + +log_files = ["sparse-matrix-benchmark.log"] +print( "Parsing log file..." ) + +dfs = [] +for f in log_files: + for df in parse_file(f): + dfs.append(df) + +df = pandas.concat(dfs) + +## Post-processing +print( "Postprocessing data frame..." ) +# Drop norms of results differences +#df.drop(columns=['CSR Diff.Max','CSR Diff.L2'], axis=1, level=1, inplace=True ) + +# show matrix formats as columns +df = df.unstack() +df = df.reorder_levels([2, 0, 1], axis=1) +df.sort_index(axis=1, inplace=True) + +# Drop CPU speedup +df.drop(columns=('BiEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('CSR', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('CSR Legacy', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('ChunkedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('Ellpack', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('Ellpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('SlicedEllpack', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('SlicedEllpack Legacy', 'CPU','speedup'), axis=1, inplace=True ) +df.drop(columns=('cuSparse', 'CPU'), axis=1, inplace=True ) + +print( "Computing speed-up of formats...") +# Add speedup compared to CSR and cuSparse +df["BiEllpack Legacy", "CPU", "CSR speedup"] = df["BiEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] +df["BiEllpack Legacy", "GPU", "cuSparse speedup"] = df["BiEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["CSR", "GPU", "cuSparse speedup"] = df["CSR", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["CSR Legacy", "GPU", "cuSparse speedup"] = df["CSR Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["ChunkedEllpack Legacy", "CPU", "CSR speedup"] = df["ChunkedEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] +df["ChunkedEllpack Legacy", "GPU", "cuSparse speedup"] = df["ChunkedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["Ellpack Legacy", "CPU", "CSR speedup"] = df["Ellpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] +df["Ellpack Legacy", "GPU", "cuSparse speedup"] = df["Ellpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["Ellpack", "CPU", "CSR speedup"] = df["Ellpack", "CPU", "time"] / df["CSR", "CPU", "time"] +df["Ellpack", "GPU", "cuSparse speedup"] = df["Ellpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["SlicedEllpack Legacy", "CPU", "CSR speedup"] = df["SlicedEllpack Legacy", "CPU", "time"] / df["CSR", "CPU", "time"] +df["SlicedEllpack Legacy", "GPU", "cuSparse speedup"] = df["SlicedEllpack Legacy", "GPU", "time"] / df["cuSparse", "GPU", "time"] +df["SlicedEllpack", "CPU", "CSR speedup"] = df["SlicedEllpack", "CPU", "time"] / df["CSR", "CPU", "time"] +df["SlicedEllpack", "GPU", "cuSparse speedup"] = df["SlicedEllpack", "GPU", "time"] / df["cuSparse", "GPU", "time"] + +# Add speedup compared to legacy formats +df["CSR", "GPU", "Legacy speedup"] = df["CSR", "GPU", "time"] / df["CSR Legacy", "GPU", "time"] +df["CSR", "CPU", "Legacy speedup"] = df["CSR", "CPU", "time"] / df["CSR Legacy", "CPU", "time"] +df["Ellpack", "GPU", "Legacy speedup"] = df["Ellpack", "GPU", "time"] / df["Ellpack Legacy", "GPU", "time"] +df["Ellpack", "CPU", "Legacy speedup"] = df["Ellpack", "CPU", "time"] / df["Ellpack Legacy", "CPU", "time"] +df["SlicedEllpack", "GPU", "Legacy speedup"] = df["SlicedEllpack", "GPU", "time"] / df["SlicedEllpack Legacy", "GPU", "time"] +df["SlicedEllpack", "CPU", "Legacy speedup"] = df["SlicedEllpack", "CPU", "time"] / df["SlicedEllpack Legacy", "CPU", "time"] + +print( "Exporting data frame to log.html..." ) +pandas.options.display.float_format = '{:,.4f}'.format +df.to_html("log.html") + +# extract columns of reference formats on GPU +print( "Preparing data for graph analysis..." ) +df['cuSparse-bandwidth']=df['cuSparse','GPU','bandwidth'] +df['ellpack-bandwidth']=df['Ellpack','GPU','bandwidth'] +df['sliced-ellpack-bandwidth']=df['SlicedEllpack','GPU','bandwidth'] + +# sort by cuSparse +df.sort_values(by=["cuSparse-bandwidth"],inplace=True,ascending=False) +cuSparse_list = df['cuSparse-bandwidth'].tolist() +cuSparse_ellpack_gpu_list = df["Ellpack", "GPU", "bandwidth"].tolist(); +cuSparse_ellpack_legacy_gpu_list = df["Ellpack Legacy", "GPU", "bandwidth"].tolist(); +cuSparse_sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist(); +cuSparse_sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist(); +cuSparse_chunked_ellpack_legacy_gpu_list = df["ChunkedEllpack Legacy", "GPU", "bandwidth"].tolist(); +cuSparse_bi_ellpack_legacy_gpu_list = df["BiEllpack Legacy", "GPU", "bandwidth"].tolist(); + +# sort by Ellpack +df.sort_values(by=["ellpack-bandwidth"],inplace=True,ascending=False) +ellpack_gpu_list = df["Ellpack", "GPU", "bandwidth"].tolist(); +ellpack_legacy_gpu_list = df["Ellpack Legacy", "GPU", "bandwidth"].tolist(); + +# sort by SlicedEllpack +df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False) +df.sort_values(by=["sliced-ellpack-bandwidth"],inplace=True,ascending=False) +sliced_ellpack_gpu_list = df["SlicedEllpack", "GPU", "bandwidth"].tolist(); +sliced_ellpack_legacy_gpu_list = df["SlicedEllpack Legacy", "GPU", "bandwidth"].tolist(); + +print( "Writing gnuplot files..." ) + +cuSparse_file = open( "cusparse.gplt", "w" ) +i = 0 +for x in cuSparse_list: + if str( x ) != "nan": + if str( cuSparse_ellpack_gpu_list[ i ] ) != "nan" and str( cuSparse_ellpack_legacy_gpu_list[ i ] ) != "nan": + cuSparse_file.write( f"{i+1} {x} " ) + cuSparse_file.write( f"{cuSparse_ellpack_gpu_list[ i ]} {cuSparse_ellpack_legacy_gpu_list[ i ]} " ) + cuSparse_file.write( f"{cuSparse_sliced_ellpack_gpu_list[ i ]} {cuSparse_sliced_ellpack_legacy_gpu_list[ i ]} " ) + cuSparse_file.write( f"{cuSparse_chunked_ellpack_legacy_gpu_list[ i ]} {cuSparse_bi_ellpack_legacy_gpu_list[ i ]}\n" ) + i = i + 1 +cuSparse_file.close() + +ellpack_file = open( "ellpack.gplt", "w" ) +i = 0; +for x in ellpack_gpu_list: + if str( x ) != "nan": + if str( ellpack_legacy_gpu_list[ i ] ) != "nan": + ellpack_file.write( f"{i+1} {x} {ellpack_legacy_gpu_list[ i ]}\n" ) + i = i + 1 +ellpack_file.close() + +sliced_ellpack_file = open( "sliced-ellpack.gplt", "w" ) +i = 0; +for x in sliced_ellpack_gpu_list: + if str( x ) != "nan": + if str( sliced_ellpack_legacy_gpu_list[ i ] ) != "nan": + sliced_ellpack_file.write( f"{i+1} {x} {sliced_ellpack_legacy_gpu_list[ i ]}\n" ) + i = i + 1 +ellpack_file.close() + +print( "Generating Gnuplot file..." ) + +gnuplot_file = open( "gnuplot.gplt", "w" ) +# NOTE: """...""" allows multi-line strings, r"..." disables backslash-escaping (so a single \ is just a \ in the output) +gnuplot_file.write( r""" +set terminal postscript lw 3 20 color +set grid +set xlabel 'Matrix' +set xtics 250 +set ylabel 'Bandwidth GB/sec' +set output 'ellpack-vs-cusparse.eps' +plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \ + 'cusparse.gplt' using 1:3 title 'Ellpack' with dots linewidth 2 lt rgb 'green', \ + 'cusparse.gplt' using 1:4 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue' +""") +# TODO: formatting like ^ +gnuplot_file.write( "set output 'sliced-ellpack-vs-cusparse.eps'\n" ) +gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) +gnuplot_file.write( " 'cusparse.gplt' using 1:5 title 'Sliced Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) +gnuplot_file.write( " 'cusparse.gplt' using 1:6 title 'Sliced Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) +gnuplot_file.write( "set output 'chunked-ellpack-vs-cusparse.eps'\n" ) +gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) +#gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack' with dots linewidth 2 lt rgb 'green',\\\n" ) +gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'Chunked Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) +gnuplot_file.write( "set output 'bi-ellpack-vs-cusparse.eps'\n" ) +gnuplot_file.write( "plot 'cusparse.gplt' using 1:2 title 'cuSparse' with lines linewidth 2 lt rgb 'red', \\\n" ) +#gnuplot_file.write( " 'cusparse.gplt' using 1:7 title 'BiEllpack' with dots linewidth 2 lt rgb 'green',\\\n" ) +gnuplot_file.write( " 'cusparse.gplt' using 1:8 title 'BiEllpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) +gnuplot_file.write( "set output 'ellpack-vs-ellpack-legacy.eps'\n" ) +gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) +gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) +gnuplot_file.write( "set output 'sliced-ellpack-vs-sliced-ellpack-legacy.eps'\n" ) +gnuplot_file.write( "plot 'ellpack.gplt' using 1:2 title 'Ellpack' with lines linewidth 2 lt rgb 'red', \\\n" ) +gnuplot_file.write( " 'ellpack.gplt' using 1:3 title 'Ellpack Legacy' with dots linewidth 2 lt rgb 'blue'\n" ) +gnuplot_file.close() + +print( "Executing Gnuplot ..." ) +os.system( "gnuplot gnuplot.gplt" ) + +print( "Converting files to PDF ..." ) +os.system( "epstopdf --autorotate All ellpack-vs-cusparse.eps" ) +os.system( "epstopdf --autorotate All sliced-ellpack-vs-cusparse.eps" ) +os.system( "epstopdf --autorotate All chunked-ellpack-vs-cusparse.eps" ) +os.system( "epstopdf --autorotate All bi-ellpack-vs-cusparse.eps" ) +os.system( "epstopdf --autorotate All ellpack-vs-ellpack-legacy.eps" ) +os.system( "epstopdf --autorotate All sliced-ellpack-vs-sliced-ellpack-legacy.eps" ) + +print( "Deleting temprary files..." ) +os.system( "rm cusparse.gplt" ) +os.system( "rm ellpack.gplt" ) +os.system( "rm sliced-ellpack.gplt" ) +os.system( "rm gnuplot.gplt" ) +os.system( "rm ellpack-vs-cusparse.eps" ) +os.system( "rm sliced-ellpack-vs-cusparse.eps" ) +os.system( "rm chunked-ellpack-vs-cusparse.eps" ) +os.system( "rm bi-ellpack-vs-cusparse.eps" ) +os.system( "rm ellpack-vs-ellpack-legacy.eps" ) +os.system( "rm sliced-ellpack-vs-sliced-ellpack-legacy.eps" ) diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index 1166245110501aa6c8c66c5a81448ab82895b54d..a73385eb1aab92d095406976c23d1b4bdb711728 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -193,6 +193,22 @@ class Array */ Array( Array&& array ); + + /** + * \brief Copy constructor from array with different template parameters. + * + * \tparam Value_ Value type of the input array. + * \tparam Device_ Device type of the input array. + * \tparam Index_ Index type of the input array. + * \tparam Allocator_ Allocator type of the input array. + * \param a the input array. + */ + template< typename Value_, + typename Device_, + typename Index_, + typename Allocator_ > + explicit Array( const Array< Value_, Device_, Index_, Allocator_ >& a ); + /** * \brief Constructor which initializes the array by copying elements from * \ref std::initializer_list, e.g. `{...}`. diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index ab81db7aabc214ab66751905446849179aa1975a..f45b7370de7fb3152df2facd377dbced37ef0466 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -132,6 +132,20 @@ Array( const Array< Value, Device, Index, Allocator >& array, Algorithms::MemoryOperations< Device >::copy( this->getData(), &array.getData()[ begin ], size ); } +template< typename Value, + typename Device, + typename Index, + typename Allocator > + template< typename Value_, + typename Device_, + typename Index_, + typename Allocator_ > +Array< Value, Device, Index, Allocator >:: +Array( const Array< Value_, Device_, Index_, Allocator_ >& a ) +{ + *this = a; +} + template< typename Value, typename Device, typename Index, diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp index 02be7f099f1fd9446200d8e10340c5a6bdc6afed..b94db8c88b5df8b14c719e099a7cc525682e6d45 100644 --- a/src/TNL/Containers/Segments/CSRView.hpp +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -28,6 +28,15 @@ CSRView() { } +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const OffsetsView& offsets_view ) + : offsets( offsets_view ) +{ +} + template< typename Device, typename Index > __cuda_callable__ diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.h b/src/TNL/Containers/Segments/ChunkedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..93580a9cdc7a877a259f84959754b7dd302cab29 --- /dev/null +++ b/src/TNL/Containers/Segments/ChunkedEllpack.h @@ -0,0 +1,163 @@ +/*************************************************************************** + ChunkedEllpack.h - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Allocators/Default.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/ChunkedEllpackView.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class ChunkedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder >; + template< typename Device_, typename Index_ > + using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >; + using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; + //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >; + using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; + using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; + + ChunkedEllpack() = default; + + ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + ChunkedEllpack( const ChunkedEllpack& segments ); + + ChunkedEllpack( const ChunkedEllpack&& segments ); + + static String getSerializationType(); + + static String getSegmentsType(); + + ViewType getView(); + + ConstViewType getConstView() const; + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + ChunkedEllpack& operator=( const ChunkedEllpack& source ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > + ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ); + + void save( File& file ) const; + + void load( File& file ); + + void printStructure( std::ostream& str ); // TODO const; + + protected: + + template< typename SegmentsSizes > + void resolveSliceSizes( SegmentsSizes& rowLengths ); + + template< typename SegmentsSizes > + bool setSlice( SegmentsSizes& rowLengths, + const IndexType sliceIdx, + IndexType& elementsToAllocation ); + + IndexType size = 0, storageSize = 0; + + IndexType chunksInSlice = 256, desiredChunkSize = 16; + + /** + * For each segment, this keeps index of the slice which contains the + * segment. + */ + OffsetsHolder rowToSliceMapping; + + /** + * For each row, this keeps index of the first chunk within a slice. + */ + OffsetsHolder rowToChunkMapping; + + OffsetsHolder chunksToSegmentsMapping; + + /** + * Keeps index of the first segment index. + */ + OffsetsHolder rowPointers; + + ChunkedEllpackSliceInfoContainer slices; + + IndexType numberOfSlices; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > + friend class ChunkedEllpack; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/ChunkedEllpack.hpp> diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.hpp b/src/TNL/Containers/Segments/ChunkedEllpack.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6d0cf6fe7ea1d517feac4edcd4071a22c28f84ae --- /dev/null +++ b/src/TNL/Containers/Segments/ChunkedEllpack.hpp @@ -0,0 +1,508 @@ +/*************************************************************************** + ChunkedEllpack.hpp - description + ------------------- + begin : Jan 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/ChunkedEllpack.h> +#include <TNL/Containers/Segments/Ellpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) + : size( 0 ), storageSize( 0 ), chunksInSlice( 0 ), desiredChunkSize( 0 ) +{ + this->setSegmentsSizes( sizes ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack( const ChunkedEllpack& chunkedEllpack ) + : size( chunkedEllpack.size ), + storageSize( chunkedEllpack.storageSize ), + chunksInSlice( chunkedEllpack.chunksInSlice ), + desiredChunkSize( chunkedEllpack.desiredChunkSize ), + rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), + rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), + chunksToSegmentsMapping( chunkedEllpack. chunksToSegmentsMapping ), + rowPointers( chunkedEllpack.rowPointers ), + slices( chunkedEllpack.slices ), + numberOfSlices( chunkedEllpack.numberOfSlices ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack ) + : size( chunkedEllpack.size ), + storageSize( chunkedEllpack.storageSize ), + chunksInSlice( chunkedEllpack.chunksInSlice ), + desiredChunkSize( chunkedEllpack.desiredChunkSize ), + rowToChunkMapping( chunkedEllpack.rowToChunkMapping ), + rowToSliceMapping( chunkedEllpack.rowTopSliceMapping ), + chunksToSegmentsMapping( chunkedEllpack. chunksToSegmentsMapping ), + rowPointers( chunkedEllpack.rowPointers ), + slices( chunkedEllpack.slices ), + numberOfSlices( chunkedEllpack.numberOfSlices ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +String +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSerializationType() +{ + return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +String +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ViewType +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getView() +{ + return ViewType( size, storageSize, chunksInSlice, desiredChunkSize, + rowToChunkMapping.getView(), + rowToSliceMapping.getView(), + chunksToSegmentsMapping.getView(), + rowPointers.getView(), + slices.getView(), + numberOfSlices ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ConstViewType +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getConstView() const +{ + return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize, + rowToChunkMapping.getConstView(), + rowToSliceMapping.getConstView(), + chunksToSegmentsMapping.getConstView(), + rowPointers.getConstView(), + slices.getConstView(), + numberOfSlices ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename SegmentsSizes > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +resolveSliceSizes( SegmentsSizes& segmentsSizes ) +{ + /**** + * Iterate over rows and allocate slices so that each slice has + * approximately the same number of allocated elements + */ + const IndexType desiredElementsInSlice = + this->chunksInSlice * this->desiredChunkSize; + + IndexType segmentIdx( 0 ), + sliceSize( 0 ), + allocatedElementsInSlice( 0 ); + numberOfSlices = 0; + while( segmentIdx < segmentsSizes.getSize() ) + { + /**** + * Add one row to the current slice until we reach the desired + * number of elements in a slice. + */ + allocatedElementsInSlice += segmentsSizes[ segmentIdx ]; + sliceSize++; + segmentIdx++; + if( allocatedElementsInSlice < desiredElementsInSlice ) + if( segmentIdx < segmentsSizes.getSize() && sliceSize < chunksInSlice ) continue; + TNL_ASSERT( sliceSize >0, ); + this->slices[ numberOfSlices ].size = sliceSize; + this->slices[ numberOfSlices ].firstSegment = segmentIdx - sliceSize; + this->slices[ numberOfSlices ].pointer = allocatedElementsInSlice; // this is only temporary + sliceSize = 0; + numberOfSlices++; + allocatedElementsInSlice = 0; + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename SegmentsSizes > +bool +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +setSlice( SegmentsSizes& rowLengths, + const IndexType sliceIndex, + IndexType& elementsToAllocation ) +{ + /**** + * Now, compute the number of chunks per each row. + * Each row get one chunk by default. + * Then each row will get additional chunks w.r. to the + * number of the elements in the row. If there are some + * free chunks left, repeat it again. + */ + const IndexType sliceSize = this->slices[ sliceIndex ].size; + const IndexType sliceBegin = this->slices[ sliceIndex ].firstSegment; + const IndexType allocatedElementsInSlice = this->slices[ sliceIndex ].pointer; + const IndexType sliceEnd = sliceBegin + sliceSize; + + IndexType freeChunks = this->chunksInSlice - sliceSize; + for( IndexType i = sliceBegin; i < sliceEnd; i++ ) + this->rowToChunkMapping.setElement( i, 1 ); + + int totalAddedChunks( 0 ); + int maxRowLength( rowLengths[ sliceBegin ] ); + for( IndexType i = sliceBegin; i < sliceEnd; i++ ) + { + double rowRatio( 0.0 ); + if( allocatedElementsInSlice != 0 ) + rowRatio = ( double ) rowLengths[ i ] / ( double ) allocatedElementsInSlice; + const IndexType addedChunks = freeChunks * rowRatio; + totalAddedChunks += addedChunks; + this->rowToChunkMapping[ i ] += addedChunks; + if( maxRowLength < rowLengths[ i ] ) + maxRowLength = rowLengths[ i ]; + } + freeChunks -= totalAddedChunks; + while( freeChunks ) + for( IndexType i = sliceBegin; i < sliceEnd && freeChunks; i++ ) + if( rowLengths[ i ] == maxRowLength ) + { + this->rowToChunkMapping[ i ]++; + freeChunks--; + } + + /**** + * Compute the chunk size + */ + IndexType maxChunkInSlice( 0 ); + for( IndexType i = sliceBegin; i < sliceEnd; i++ ) + { + TNL_ASSERT_NE( this->rowToChunkMapping[ i ], 0, "" ); + maxChunkInSlice = TNL::max( maxChunkInSlice, + roundUpDivision( rowLengths[ i ], this->rowToChunkMapping[ i ] ) ); + } + TNL_ASSERT_GT( maxChunkInSlice, 0, "" ); + + /**** + * Set-up the slice info. + */ + this->slices[ sliceIndex ].chunkSize = maxChunkInSlice; + this->slices[ sliceIndex ].pointer = elementsToAllocation; + elementsToAllocation += this->chunksInSlice * maxChunkInSlice; + + for( IndexType i = sliceBegin; i < sliceEnd; i++ ) + this->rowToSliceMapping[ i ] = sliceIndex; + + for( IndexType i = sliceBegin; i < sliceEnd; i++ ) + { + this->rowPointers[ i + 1 ] = maxChunkInSlice*rowToChunkMapping[ i ]; + TNL_ASSERT( this->rowPointers[ i ] >= 0, + std::cerr << "this->rowPointers[ i ] = " << this->rowPointers[ i ] ); + TNL_ASSERT( this->rowPointers[ i + 1 ] >= 0, + std::cerr << "this->rowPointers[ i + 1 ] = " << this->rowPointers[ i + 1 ] ); + } + + /**** + * Finish the row to chunk mapping by computing the prefix sum. + */ + for( IndexType j = sliceBegin + 1; j < sliceEnd; j++ ) + rowToChunkMapping[ j ] += rowToChunkMapping[ j - 1 ]; + return true; +} + + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename SizesHolder > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +setSegmentsSizes( const SizesHolder& segmentsSizes ) +{ + if( std::is_same< DeviceType, Devices::Host >::value ) + { + this->size = segmentsSizes.getSize(); + this->slices.setSize( this->size ); + this->rowToChunkMapping.setSize( this->size ); + this->rowToSliceMapping.setSize( this->size ); + this->rowPointers.setSize( this->size + 1 ); + + this->resolveSliceSizes( segmentsSizes ); + this->rowPointers.setElement( 0, 0 ); + this->storageSize = 0; + for( IndexType sliceIndex = 0; sliceIndex < numberOfSlices; sliceIndex++ ) + this->setSlice( segmentsSizes, sliceIndex, storageSize ); + this->rowPointers.scan(); + IndexType chunksCount = this->numberOfSlices * this->chunksInSlice; + this->chunksToSegmentsMapping.setSize( chunksCount ); + IndexType chunkIdx( 0 ); + for( IndexType segmentIdx = 0; segmentIdx < this->size; segmentIdx++ ) + { + const IndexType& sliceIdx = rowToSliceMapping[ segmentIdx ]; + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIdx ].firstSegment ) + firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; + + const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + for( IndexType i = 0; i < segmentChunksCount; i++ ) + this->chunksToSegmentsMapping[ chunkIdx++ ] = segmentIdx; + } + } + else + { + ChunkedEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< Index >, RowMajorOrder > hostSegments; + Containers::Vector< IndexType, Devices::Host, IndexType > hostSegmentsSizes( segmentsSizes ); + hostSegments.setSegmentsSizes( hostSegmentsSizes ); + *this = hostSegments; + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSegmentsCount() const +{ + return this->segmentsCount; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +Index +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize( + rowToSliceMapping.getView(), + slices.getView(), + rowToChunkMapping.getView(), + segmentIdx ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getStorageSize() const +{ + return this->storageSize; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx, + localIdx ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +__cuda_callable__ +auto +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename Function, typename... Args > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + this->getConstView().forSegments( first, last, f, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename Function, typename... Args > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >& +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ) +{ + this->size = source.size; + this->storageSize = source.storageSize; + this->chunksInSlice = source.chunksInSlice; + this->desiredChunkSize = source.desiredChunkSize; + this->rowToChunkMapping = source.rowToChunkMapping; + this->rowToSliceMapping = source.rowToSliceMapping; + this->rowPointers = source.rowPointers; + this->chunksToSegmentsMapping = source.chunksToSegmentsMapping; + this->slices = source.slices; + this->numberOfSlices = source.numberOfSlices; + return *this; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +save( File& file ) const +{ + file.save( &this->size ); + file.save( &this->storageSize ); + file.save( &this->chunksInSlice ); + file.save( &this->desiredChunkSize ); + file << this->rowToChunkMapping + << this->rowToSliceMapping + << this->rowPointers + << this->chunksToSegmentsMapping + << this->slices; + file.save( this->numberOfSlices ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +load( File& file ) +{ + file.load( &this->size ); + file.load( &this->storageSize ); + file.load( &this->chunksInSlice ); + file.load( &this->desiredChunkSize ); + file >> this->rowToChunkMapping + >> this->rowToSliceMapping + >> this->chunksToSegmentsMapping + >> this->rowPointers + >> this->slices; + file.load( &this->numberOfSlices ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder > +void +ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +printStructure( std::ostream& str ) +{ + this->getView().printStructure( str ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h new file mode 100644 index 0000000000000000000000000000000000000000..9eba9dd6867fe023ba418d70ff2c616e5f1e1e3b --- /dev/null +++ b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h @@ -0,0 +1,94 @@ +/*************************************************************************** + ChunkedEllpackChunkedEllpackSegmentView.h - description + ------------------- + begin : Mar 24, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Index, + bool RowMajorOrder = false > +class ChunkedEllpackSegmentView; + +template< typename Index > +class ChunkedEllpackSegmentView< Index, false > +{ + public: + + using IndexType = Index; + + __cuda_callable__ + ChunkedEllpackSegmentView( const IndexType offset, + const IndexType size, + const IndexType chunkSize, // this is only for compatibility with the following specialization + const IndexType chunksInSlice ) // this one as well - both can be replaced when we could use constexprif in C++17 + : segmentOffset( offset ), segmentSize( size ){}; + + __cuda_callable__ + ChunkedEllpackSegmentView( const ChunkedEllpackSegmentView& view ) + : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex; + }; + + protected: + + IndexType segmentOffset, segmentSize; +}; + +template< typename Index > +class ChunkedEllpackSegmentView< Index, true > +{ + public: + + using IndexType = Index; + + __cuda_callable__ + ChunkedEllpackSegmentView( const IndexType offset, + const IndexType size, + const IndexType chunkSize, + const IndexType chunksInSlice ) + : segmentOffset( offset ), segmentSize( size ), + chunkSize( chunkSize ), chunksInSlice( chunksInSlice ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIdx ) const + { + TNL_ASSERT_LT( localIdx, segmentSize, "Local index exceeds segment bounds." ); + const IndexType chunkIdx = localIdx / chunkSize; + const IndexType inChunkOffset = localIdx % chunkSize; + return segmentOffset + inChunkOffset * chunksInSlice + chunkIdx; + }; + + protected: + + IndexType segmentOffset, segmentSize, chunkSize, chunksInSlice; +}; + + } //namespace Segments + } //namespace Containers +} //namespace TNL diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.h b/src/TNL/Containers/Segments/ChunkedEllpackView.h new file mode 100644 index 0000000000000000000000000000000000000000..4b444d08473a2763c2f50a7af5bf8fc70b5fef0e --- /dev/null +++ b/src/TNL/Containers/Segments/ChunkedEllpackView.h @@ -0,0 +1,215 @@ +/*************************************************************************** + ChunkedEllpackView.h - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/ChunkedEllpackSegmentView.h> +#include <TNL/Containers/Segments/details/ChunkedEllpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class ChunkedEllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >; + using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType; + using ViewType = ChunkedEllpackView; + template< typename Device_, typename Index_ > + using ViewTemplate = ChunkedEllpackView< Device_, Index_ >; + using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index > >; + using SegmentViewType = ChunkedEllpackSegmentView< IndexType >; + using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; + using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; + using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; + using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType; + + __cuda_callable__ + ChunkedEllpackView() = default; + + __cuda_callable__ + ChunkedEllpackView( const IndexType size, + const IndexType storageSize, + const IndexType chunksInSlice, + const IndexType desiredChunkSize, + const OffsetsView& rowToChunkMapping, + const OffsetsView& rowToSliceMapping, + const OffsetsView& chunksToSegmentsMapping, + const OffsetsView& rowPointers, + const ChunkedEllpackSliceInfoContainerView& slices, + const IndexType numberOfSlices ); + + __cuda_callable__ + ChunkedEllpackView( const IndexType size, + const IndexType storageSize, + const IndexType chunksInSlice, + const IndexType desiredChunkSize, + const OffsetsView&& rowToChunkMapping, + const OffsetsView&& rowToSliceMapping, + const OffsetsView&& chunksToSegmentsMapping, + const OffsetsView&& rowPointers, + const ChunkedEllpackSliceInfoContainerView&& slices, + const IndexType numberOfSlices ); + + __cuda_callable__ + ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view ); + + __cuda_callable__ + ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view ); + + static String getSerializationType(); + + static String getSegmentsType(); + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + ChunkedEllpackView& operator=( const ChunkedEllpackView& view ); + + void save( File& file ) const; + + void load( File& file ); + + void printStructure( std::ostream& str ) const; + + protected: + +#ifdef HAVE_CUDA + template< typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > + __device__ + void segmentsReductionKernel( IndexType gridIdx, + IndexType first, + IndexType last, + Fetch fetch, + Reduction reduction, + ResultKeeper keeper, + Real zero, + Args... args ) const; +#endif + + IndexType size = 0, storageSize = 0, numberOfSlices = 0; + + IndexType chunksInSlice = 256, desiredChunkSize = 16; + + /** + * For each segment, this keeps index of the slice which contains the + * segment. + */ + OffsetsView rowToSliceMapping; + + /** + * For each row, this keeps index of the first chunk within a slice. + */ + OffsetsView rowToChunkMapping; + + OffsetsView chunksToSegmentsMapping; + + /** + * Keeps index of the first segment index. + */ + OffsetsView rowPointers; + + ChunkedEllpackSliceInfoContainerView slices; + +#ifdef HAVE_CUDA + template< typename View_, + typename Index_, + typename Fetch_, + typename Reduction_, + typename ResultKeeper_, + typename Real_, + typename... Args_ > + friend __global__ + void ChunkedEllpackSegmentsReductionKernel( View_ chunkedEllpack, + Index_ gridIdx, + Index_ first, + Index_ last, + Fetch_ fetch, + Reduction_ reduction, + ResultKeeper_ keeper, + Real_ zero, + Args_... args ); +#endif +}; + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/ChunkedEllpackView.hpp> diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.hpp b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b3b151624f500282451ff0ea4a643c9011790b6c --- /dev/null +++ b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp @@ -0,0 +1,654 @@ +/*************************************************************************** + ChunkedEllpackView.hpp - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/ChunkedEllpackView.h> +//#include <TNL/Containers/Segments/details/ChunkedEllpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +#ifdef HAVE_CUDA +template< typename View, + typename Index, + typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > +__global__ +void ChunkedEllpackSegmentsReductionKernel( View chunkedEllpack, + Index gridIdx, + Index first, + Index last, + Fetch fetch, + Reduction reduction, + ResultKeeper keeper, + Real zero, + Args... args ) +{ + chunkedEllpack.segmentsReductionKernel( gridIdx, first, last, fetch, reduction, keeper, zero, args... ); +} +#endif + + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView( const IndexType size, + const IndexType storageSize, + const IndexType chunksInSlice, + const IndexType desiredChunkSize, + const OffsetsView& rowToChunkMapping, + const OffsetsView& rowToSliceMapping, + const OffsetsView& chunksToSegmentsMapping, + const OffsetsView& rowPointers, + const ChunkedEllpackSliceInfoContainerView& slices, + const IndexType numberOfSlices ) +: size( size ), + storageSize( storageSize ), + numberOfSlices( numberOfSlices ), + chunksInSlice( chunksInSlice ), + desiredChunkSize( desiredChunkSize ), + rowToSliceMapping( rowToSliceMapping ), + rowToChunkMapping( rowToChunkMapping ), + chunksToSegmentsMapping( chunksToSegmentsMapping ), + rowPointers( rowPointers ), + slices( slices ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView( const IndexType size, + const IndexType storageSize, + const IndexType chunksInSlice, + const IndexType desiredChunkSize, + const OffsetsView&& rowToChunkMapping, + const OffsetsView&& rowToSliceMapping, + const OffsetsView&& chunksToSegmentsMapping, + const OffsetsView&& rowPointers, + const ChunkedEllpackSliceInfoContainerView&& slices, + const IndexType numberOfSlices ) +: size( size ), + storageSize( storageSize ), + numberOfSlices( numberOfSlices ), + chunksInSlice( chunksInSlice ), + desiredChunkSize( desiredChunkSize ), + rowToSliceMapping( std::move( rowToSliceMapping ) ), + rowToChunkMapping( std::move( rowToChunkMapping ) ), + chunksToSegmentsMapping( std::move( chunksToSegmentsMapping ) ), + rowPointers( std::move( rowPointers ) ), + slices( std::move( slices ) ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view ) +: size( chunked_ellpack_view.size ), + storageSize( chunked_ellpack_view.storageSize ), + numberOfSlices( chunked_ellpack_view.numberOfSlices ), + chunksInSlice( chunked_ellpack_view.chunksInSlice ), + desiredChunkSize( chunked_ellpack_view.desiredChunkSize ), + rowToSliceMapping( chunked_ellpack_view.rowToSliceMapping ), + rowToChunkMapping( chunked_ellpack_view.rowToChunkMapping ), + chunksToSegmentsMapping( chunked_ellpack_view.chunksToSegmentsMapping ), + rowPointers( chunked_ellpack_view.rowPointers ), + slices( chunked_ellpack_view.slices ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view ) +: size( chunked_ellpack_view.size ), + storageSize( chunked_ellpack_view.storageSize ), + numberOfSlices( chunked_ellpack_view.numberOfSlices ), + chunksInSlice( chunked_ellpack_view.chunksInSlice ), + desiredChunkSize( chunked_ellpack_view.desiredChunkSize ), + rowToSliceMapping( std::move( chunked_ellpack_view.rowToSliceMapping ) ), + rowToChunkMapping( std::move( chunked_ellpack_view.rowToChunkMapping ) ), + chunksToSegmentsMapping( std::move( chunked_ellpack_view.chunksToSegmentsMapping ) ), + rowPointers( std::move( chunked_ellpack_view.rowPointers ) ), + slices( std::move( chunked_ellpack_view.slices ) ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +String +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +String +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSegmentsType() +{ + return "ChunkedEllpack"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +typename ChunkedEllpackView< Device, Index, RowMajorOrder >::ViewType +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getView() +{ + return ViewType( size, chunksInSlice, desiredChunkSize, + rowToChunkMapping.getView(), + rowToSliceMapping.getView(), + chunksToSegmentsMapping.getView(), + rowPointers.getView(), + slices.getView(), + numberOfSlices ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +typename ChunkedEllpackView< Device, Index, RowMajorOrder >::ConstViewType +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getConstView() const +{ + return ConstViewType( size, chunksInSlice, desiredChunkSize, + rowToChunkMapping.getConstView(), + rowToSliceMapping.getConstView(), + chunksToSegmentsMapping.getConstView(), + rowPointers.getConstView(), + slices.getConstView(), + numberOfSlices ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSegmentsCount() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + if( std::is_same< DeviceType, Devices::Host >::value ) + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + segmentIdx ); + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { +#ifdef __CUDA_ARCH__ + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + segmentIdx ); +#else + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize( + rowToSliceMapping, + slices, + rowToChunkMapping, + segmentIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getStorageSize() const +{ + return this->storageSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( std::is_same< DeviceType, Devices::Host >::value ) + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx, + localIdx ); + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { +#ifdef __CUDA_ARCH__ + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx, + localIdx ); +#else + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx, + localIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + if( std::is_same< DeviceType, Devices::Host >::value ) + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx ); + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { +#ifdef __CUDA_ARCH__ + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx ); +#else + return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentView( + rowToSliceMapping, + slices, + rowToChunkMapping, + chunksInSlice, + segmentIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function, typename... Args > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const IndexType chunksInSlice = this->chunksInSlice; + auto rowToChunkMapping = this->rowToChunkMapping; + auto rowToSliceMapping = this->rowToSliceMapping; + auto slices = this->slices; + auto work = [=] __cuda_callable__ ( IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = rowToSliceMapping[ segmentIdx ]; + + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIdx ].firstSegment ) + { + firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; + } + + const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices[ sliceIdx ].pointer; + const IndexType chunkSize = slices[ sliceIdx ].chunkSize; + + const IndexType segmentSize = segmentChunksCount * chunkSize; + bool compute( true ); + if( RowMajorOrder ) + { + IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize; + IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType j = begin; j < end && compute; j++ ) + f( segmentIdx, localIdx++, j, compute, args...); + } + else + { + IndexType localIdx( 0 ); + for( IndexType chunkIdx = 0; chunkIdx < segmentChunksCount; chunkIdx++ ) + { + IndexType begin = sliceOffset + firstChunkOfSegment + chunkIdx; + IndexType end = begin + chunksInSlice * chunkSize; + for( IndexType j = begin; j < end && compute; j += chunksInSlice ) + { + f( segmentIdx, localIdx++, j, compute, args...); + } + } + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last , work, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function, typename... Args > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + if( std::is_same< DeviceType, Devices::Host >::value ) + { + //segmentsReductionKernel( 0, first, last, fetch, reduction, keeper, zero, args... ); + //return; + + for( IndexType segmentIdx = first; segmentIdx < last; segmentIdx++ ) + { + const IndexType& sliceIndex = rowToSliceMapping[ segmentIdx ]; + TNL_ASSERT_LE( sliceIndex, this->size, "" ); + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIndex ].firstSegment ) + firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; + + const IndexType lastChunkOfSegment = rowToChunkMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices[ sliceIndex ].pointer; + const IndexType chunkSize = slices[ sliceIndex ].chunkSize; + + const IndexType segmentSize = segmentChunksCount * chunkSize; + RealType aux( zero ); + bool compute( true ); + if( RowMajorOrder ) + { + IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize; + IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( segmentIdx, localIdx++, j, compute, args...) ); + } + else + { + for( IndexType chunkIdx = 0; chunkIdx < segmentChunksCount; chunkIdx++ ) + { + IndexType begin = sliceOffset + firstChunkOfSegment + chunkIdx; + IndexType end = begin + chunksInSlice * chunkSize; + IndexType localIdx( 0 ); + for( IndexType j = begin; j < end && compute; j += chunksInSlice ) + reduction( aux, fetch( segmentIdx, localIdx++, j, compute, args...) ); + } + } + keeper( segmentIdx, aux ); + } + } + if( std::is_same< DeviceType, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + //const IndexType chunksCount = this->numberOfSlices * this->chunksInSlice; + // TODO: This ignores parameters first and last + const IndexType cudaBlocks = this->numberOfSlices; + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + dim3 cudaBlockSize( this->chunksInSlice ), cudaGridSize; + const IndexType sharedMemory = cudaBlockSize.x * sizeof( RealType ); + + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + ChunkedEllpackSegmentsReductionKernel< ViewType, IndexType, Fetch, Reduction, ResultKeeper, Real, Args... > + <<< cudaGridSize, cudaBlockSize, sharedMemory >>> + ( *this, gridIdx, first, last, fetch, reduction, keeper, zero, args... ); + } +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +ChunkedEllpackView< Device, Index, RowMajorOrder >& +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +operator=( const ChunkedEllpackView& view ) +{ + this->size = view.size; + this->storageSize = view.storageSize; + this->chunksInSlice = view.chunksInSlice; + this->desiredChunkSize = view.desiredChunkSize; + this->rowToChunkMapping.bind( view.rowToChunkMapping ); + this->chunksToSegmentsMapping.bind( view.chunksToSegmentsMapping ); + this->rowToSliceMapping.bind( view.rowToSliceMapping ); + this->rowPointers.bind( view.rowPointers ); + this->slices.bind( view.slices ); + this->numberOfSlices = view.numberOfSlices; + return *this; +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +save( File& file ) const +{ + file.save( &this->size ); + file.save( &this->storageSize ); + file.save( &this->chunksInSlice ); + file.save( &this->desiredChunkSize ); + file << this->rowToChunkMapping + << this->chunksToSegmentsMapping + << this->rowToSliceMapping + << this->rowPointers + << this->slices; + file.save( &this->numberOfSlices ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +load( File& file ) +{ + file.load( &this->size ); + file.load( &this->storageSize ); + file.load( &this->chunksInSlice ); + file.load( &this->desiredChunkSize ); + file >> this->rowToChunkMapping + >> this->chunksToSegmentsMapping + >> this->rowToSliceMapping + >> this->rowPointers + >> this->slices; + file.load( &this->numberOfSlices ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder > +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +printStructure( std::ostream& str ) const +{ + //const IndexType numberOfSlices = this->getNumberOfSlices(); + str << "Segments count: " << this->getSize() << std::endl + << "Slices: " << numberOfSlices << std::endl; + for( IndexType i = 0; i < numberOfSlices; i++ ) + str << " Slice " << i + << " : size = " << this->slices.getElement( i ).size + << " chunkSize = " << this->slices.getElement( i ).chunkSize + << " firstSegment = " << this->slices.getElement( i ).firstSegment + << " pointer = " << this->slices.getElement( i ).pointer << std::endl; + for( IndexType i = 0; i < this->getSize(); i++ ) + str << "Segment " << i + << " : slice = " << this->rowToSliceMapping.getElement( i ) + << " chunk = " << this->rowToChunkMapping.getElement( i ) << std::endl; +} + +#ifdef HAVE_CUDA +template< typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real, + typename... Args > +__device__ +void +ChunkedEllpackView< Device, Index, RowMajorOrder >:: +segmentsReductionKernel( IndexType gridIdx, + IndexType first, + IndexType last, + Fetch fetch, + Reduction reduction, + ResultKeeper keeper, + Real zero, + Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + + const IndexType firstSlice = rowToChunkMapping[ first ] / chunksInSlice; + const IndexType lastSlice = rowToChunkMapping[ last - 1 ] / chunksInSlice; + //for( IndexType sliceIdx = firstSlice; sliceIdx < lastSlice; sliceIdx++ ) + { + const IndexType sliceIdx = gridIdx * Cuda::getMaxGridSize() + blockIdx.x; + if( sliceIdx >= lastSlice ) + return; + + RealType* chunksResults = Cuda::getSharedMemory< RealType >(); + //for( IndexType threadIdx = 0; threadIdx < 256; threadIdx++ ) + //{ + __shared__ details::ChunkedEllpackSliceInfo< IndexType > sliceInfo; + if( threadIdx.x == 0 ) + sliceInfo = this->slices[ sliceIdx ]; + chunksResults[ threadIdx.x ] = zero; + __syncthreads(); + + + + const IndexType sliceOffset = sliceInfo.pointer; + const IndexType chunkSize = sliceInfo.chunkSize; + const IndexType chunkIdx = sliceIdx * chunksInSlice + threadIdx.x; + const IndexType segmentIdx = this->chunksToSegmentsMapping[ chunkIdx ]; + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != sliceInfo.firstSegment ) + firstChunkOfSegment = rowToChunkMapping[ segmentIdx - 1 ]; + IndexType localIdx = ( threadIdx.x - firstChunkOfSegment ) * chunkSize; + bool compute( true ); + + if( RowMajorOrder ) + { + IndexType begin = sliceOffset + threadIdx.x * chunkSize; // threadIdx.x = chunkIdx within the slice + IndexType end = begin + chunkSize; + for( IndexType j = begin; j < end && compute; j++ ) + reduction( chunksResults[ threadIdx.x ], fetch( segmentIdx, localIdx++, j, compute, args...) ); + } + else + { + const IndexType begin = sliceOffset + threadIdx.x; // threadIdx.x = chunkIdx within the slice + const IndexType end = begin + chunksInSlice * chunkSize; + for( IndexType j = begin; j < end && compute; j += chunksInSlice ) + reduction( chunksResults[ threadIdx.x ], fetch( segmentIdx, localIdx++, j, compute, args...) ); + } + __syncthreads(); + //} + + //details::ChunkedEllpackSliceInfo< IndexType > sliceInfo; + //for( IndexType threadIdx = 0; threadIdx < 256; threadIdx++ ) + //{ + //if( threadIdx == 0 ) + // sliceInfo = this->slices[ sliceIdx ]; + if( threadIdx.x < sliceInfo.size ) + { + const IndexType row = sliceInfo.firstSegment + threadIdx.x; + IndexType chunkIndex( 0 ); + if( threadIdx.x != 0 ) + chunkIndex = this->rowToChunkMapping[ row - 1 ]; + const IndexType lastChunk = this->rowToChunkMapping[ row ]; + RealType result( zero ); + while( chunkIndex < lastChunk ) + reduction( result, chunksResults[ chunkIndex++ ] ); + keeper( row, result ); + } + //} // threadIdx + } // sliceIdx +} +#endif + + } // namespace Segments + } // namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/ChunkedEllpack.h b/src/TNL/Containers/Segments/details/ChunkedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..8807de226cf2d7648a95cc2d788b27a5775767ac --- /dev/null +++ b/src/TNL/Containers/Segments/details/ChunkedEllpack.h @@ -0,0 +1,229 @@ +/*************************************************************************** + ChunkedEllpack.h - description + ------------------- + begin : Mar 25, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/ChunkedEllpackSegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + namespace details { + +/*** + * In the ChunkedEllpack, the segments are split into slices. This is done + * in ChunkedEllpack::resolveSliceSizes. All segments elements in each slice + * are split into chunks. All chunks in one slice have the same size, but the size + * of chunks can be different in each slice. + */ +template< typename Index > +struct ChunkedEllpackSliceInfo +{ + /** + * The size of the slice, it means the number of the segments covered by + * the slice. + */ + Index size; + + /** + * The chunk size, i.e. maximal number of non-zero elements that can be stored + * in the chunk. + */ + Index chunkSize; + + /** + * Index of the first segment covered be this slice. + */ + Index firstSegment; + + /** + * Position of the first element of this slice. + */ + Index pointer; +}; + + +template< typename Index, + typename Device, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class ChunkedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using OffsetsHolderView = typename OffsetsHolder::ViewType; + using SegmentsSizes = OffsetsHolder; + using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; + using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; + using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; + using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType; + using SegmentViewType = ChunkedEllpackSegmentView< IndexType >; + + __cuda_callable__ static + IndexType getSegmentSizeDirect( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType segmentIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ]; + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIndex ].firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ]; + + const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType chunkSize = slices[ sliceIndex ].chunkSize; + return chunkSize * segmentChunksCount; + } + + static + IndexType getSegmentSize( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType segmentIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx ); + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices.getElement( sliceIndex ).firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 ); + + const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx ); + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize; + return chunkSize * segmentChunksCount; + } + + __cuda_callable__ static + IndexType getGlobalIndexDirect( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType chunksInSlice, + const IndexType segmentIdx, + const IndexType localIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ]; + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIndex ].firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ]; + + const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices[ sliceIndex ].pointer; + const IndexType chunkSize = slices[ sliceIndex ].chunkSize; + TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); + + if( RowMajorOrder ) + return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; + else + { + const IndexType inChunkOffset = localIdx % chunkSize; + const IndexType chunkIdx = localIdx / chunkSize; + return sliceOffset + inChunkOffset * chunksInSlice + firstChunkOfSegment + chunkIdx; + } + } + + static + IndexType getGlobalIndex( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType chunksInSlice, + const IndexType segmentIdx, + const IndexType localIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx ); + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices.getElement( sliceIndex ).firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 ); + + const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx ); + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices.getElement( sliceIndex ).pointer; + const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize; + TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); + + if( RowMajorOrder ) + return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; + else + { + const IndexType inChunkOffset = localIdx % chunkSize; + const IndexType chunkIdx = localIdx / chunkSize; + return sliceOffset + inChunkOffset * chunksInSlice + firstChunkOfSegment + chunkIdx; + } + } + + static __cuda_callable__ + SegmentViewType getSegmentViewDirect( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType chunksInSlice, + const IndexType segmentIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping[ segmentIdx ]; + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices[ sliceIndex ].firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping[ segmentIdx - 1 ]; + + const IndexType lastChunkOfSegment = segmentsToChunksMapping[ segmentIdx ]; + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices[ sliceIndex ].pointer; + const IndexType chunkSize = slices[ sliceIndex ].chunkSize; + const IndexType segmentSize = segmentChunksCount * chunkSize; + + if( RowMajorOrder ) + return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize, + segmentSize, + chunkSize, + chunksInSlice ); + else + return SegmentViewType( sliceOffset + firstChunkOfSegment, + segmentSize, + chunkSize, + chunksInSlice ); + } + + static __cuda_callable__ + SegmentViewType getSegmentView( const OffsetsHolderView& segmentsToSlicesMapping, + const ChunkedEllpackSliceInfoContainerView& slices, + const OffsetsHolderView& segmentsToChunksMapping, + const IndexType chunksInSlice, + const IndexType segmentIdx ) + { + const IndexType& sliceIndex = segmentsToSlicesMapping.getElement( segmentIdx ); + IndexType firstChunkOfSegment( 0 ); + if( segmentIdx != slices.getElement( sliceIndex ).firstSegment ) + firstChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx - 1 ); + + const IndexType lastChunkOfSegment = segmentsToChunksMapping.getElement( segmentIdx ); + const IndexType segmentChunksCount = lastChunkOfSegment - firstChunkOfSegment; + const IndexType sliceOffset = slices.getElement( sliceIndex ).pointer; + const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize; + const IndexType segmentSize = segmentChunksCount * chunkSize; + + if( RowMajorOrder ) + return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize, + segmentSize, + chunkSize, + chunksInSlice ); + else + return SegmentViewType( sliceOffset + firstChunkOfSegment, + segmentSize, + chunkSize, + chunksInSlice ); + } +}; + } //namespace details + } //namespace Segments + } //namespace Containers +} //namepsace TNL diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h deleted file mode 100644 index ecfe63107325793717482b3710c9533a153c34c1..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/Segments/details/Ellpack.h +++ /dev/null @@ -1,105 +0,0 @@ -/*************************************************************************** - Ellpack.h - description - ------------------- - begin : Dec 3, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Containers/Vector.h> - -namespace TNL { - namespace Containers { - namespace Segments { - -template< typename Device, - typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, - int Alignment = 32 > -class Ellpack -{ - public: - - using DeviceType = Device; - using IndexType = Index; - static constexpr int getAlignment() { return Alignment; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; - using SegmentsSizes = OffsetsHolder; - - Ellpack(); - - Ellpack( const SegmentsSizes& sizes ); - - Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); - - Ellpack( const Ellpack& segments ); - - Ellpack( const Ellpack&& segments ); - - /** - * \brief Set sizes of particular segments. - */ - template< typename SizesHolder = OffsetsHolder > - void setSegmentsSizes( const SizesHolder& sizes ); - - void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); - /** - * \brief Number segments. - */ - __cuda_callable__ - IndexType getSegmentsCount() const; - - __cuda_callable__ - IndexType getSegmentSize( const IndexType segmentIdx ) const; - - __cuda_callable__ - IndexType getSize() const; - - __cuda_callable__ - IndexType getStorageSize() const; - - __cuda_callable__ - IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - - /*** - * \brief Go over all segments and for each segment element call - * function 'f' with arguments 'args'. The return type of 'f' is bool. - * When its true, the for-loop continues. Once 'f' returns false, the for-loop - * is terminated. - */ - template< typename Function, typename... Args > - void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; - - template< typename Function, typename... Args > - void forAll( Function& f, Args... args ) const; - - - /*** - * \brief Go over all segments and perform a reduction in each of them. - */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; - - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; - - void save( File& file ) const; - - void load( File& file ); - - protected: - - IndexType segmentSize, size, alignedSize; -}; - - } // namespace Segements - } // namespace Conatiners -} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h deleted file mode 100644 index 6f185bc469e1c1826348b5662735d6a2992fc087..0000000000000000000000000000000000000000 --- a/src/TNL/Containers/Segments/details/SlicedEllpack.h +++ /dev/null @@ -1,104 +0,0 @@ -/*************************************************************************** - SlicedEllpack.h - description - ------------------- - begin : Dec 4, 2019 - copyright : (C) 2019 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Containers/Vector.h> - -namespace TNL { - namespace Containers { - namespace Segments { - -template< typename Device, - typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, - int SliceSize = 32 > -class SlicedEllpack -{ - public: - - using DeviceType = Device; - using IndexType = Index; - using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; - static constexpr int getSliceSize() { return SliceSize; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } - - SlicedEllpack(); - - SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); - - SlicedEllpack( const SlicedEllpack& segments ); - - SlicedEllpack( const SlicedEllpack&& segments ); - - /** - * \brief Set sizes of particular segments. - */ - template< typename SizesHolder = OffsetsHolder > - void setSegmentsSizes( const SizesHolder& sizes ); - - __cuda_callable__ - IndexType getSegmentsCount() const; - - __cuda_callable__ - IndexType getSegmentSize( const IndexType segmentIdx ) const; - - /** - * \brief Number segments. - */ - __cuda_callable__ - IndexType getSize() const; - - - __cuda_callable__ - IndexType getStorageSize() const; - - __cuda_callable__ - IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; - - __cuda_callable__ - void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; - - /*** - * \brief Go over all segments and for each segment element call - * function 'f' with arguments 'args'. The return type of 'f' is bool. - * When its true, the for-loop continues. Once 'f' returns false, the for-loop - * is terminated. - */ - template< typename Function, typename... Args > - void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; - - template< typename Function, typename... Args > - void forAll( Function& f, Args... args ) const; - - - /*** - * \brief Go over all segments and perform a reduction in each of them. - */ - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; - - template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > - void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; - - void save( File& file ) const; - - void load( File& file ); - - protected: - - IndexType size, alignedSize, segmentsCount; - - OffsetsHolder sliceOffsets, sliceSegmentSizes; -}; - - } // namespace Segements - } // namespace Conatiners -} // namespace TNL diff --git a/src/TNL/Containers/StaticArray.h b/src/TNL/Containers/StaticArray.h index 51ee055066fab43d3eaca7a53e5a1bc1bee2abb0..5702d9fe7375df94177f67d1e65cdaf3e65ffa4b 100644 --- a/src/TNL/Containers/StaticArray.h +++ b/src/TNL/Containers/StaticArray.h @@ -227,7 +227,6 @@ public: /** * \brief Sets all values of this static array to \e val. */ - [[deprecated( "Use of StaticArray::setValue is deprecated, assignment operator=() should be used instead." )]] __cuda_callable__ void setValue( const ValueType& val ); diff --git a/src/TNL/Containers/Vector.h b/src/TNL/Containers/Vector.h index 259081052c1d0c1241f190cdad4002f98b3bf2d0..685a7f2068e665c7767b03350101367fa06ee123 100644 --- a/src/TNL/Containers/Vector.h +++ b/src/TNL/Containers/Vector.h @@ -112,6 +112,16 @@ public: */ Vector( Vector&& ) = default; + /** + * \brief Constructor from expression template + * + * @param expression input expression template + */ + template< typename VectorExpression, + typename..., + typename = std::enable_if_t< Expressions::HasEnabledExpressionTemplates< VectorExpression >::value && ! IsArrayType< VectorExpression >::value > > + explicit Vector( const VectorExpression& expression ); + /** * \brief Copy-assignment operator for copying data from another vector. */ diff --git a/src/TNL/Containers/Vector.hpp b/src/TNL/Containers/Vector.hpp index 5fdce0d09d2adb53b7c19e971fdf3b0a545891a5..b25ccbb5ac5ef117d17c024de38f5549d77f0b9e 100644 --- a/src/TNL/Containers/Vector.hpp +++ b/src/TNL/Containers/Vector.hpp @@ -27,6 +27,20 @@ Vector( const Vector& vector, { } +template< typename Real, + typename Device, + typename Index, + typename Allocator > + template< typename VectorExpression, + typename..., + typename > +Vector< Real, Device, Index, Allocator >:: +Vector( const VectorExpression& expression ) +{ + detail::VectorAssignment< Vector, VectorExpression >::resize( *this, expression ); + detail::VectorAssignment< Vector, VectorExpression >::assign( *this, expression ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Devices/AnyDevice.h b/src/TNL/Devices/AnyDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..8c82847293aa14cb18334bf4d3d3e3f1ab95d2b6 --- /dev/null +++ b/src/TNL/Devices/AnyDevice.h @@ -0,0 +1,35 @@ +/*************************************************************************** + AnyDevice.h - description + ------------------- + begin : Mar 17, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Devices/Host.h> + +namespace TNL { +namespace Devices { + +class AnyDevice +{ +}; + +template< typename Device > +struct PickDevice +{ + using DeviceType = Device; +}; + +template<> +struct PickDevice< Devices::AnyDevice > +{ + using DeviceType = Devices::Host; +}; + +} // namespace Devices +} // namespace TNL diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index ada48ee0297438c717772433fb6a09972f2d49e8..6a4795a7e748a26f32536f912f03886e3305bd9a 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -57,6 +57,8 @@ class Dense : public Matrix< Real, Device, Index > Dense( const IndexType rows, const IndexType columns ); + Dense( std::initializer_list< std::initializer_list< RealType > > data ); + ViewType getView(); ConstViewType getConstView() const; @@ -71,7 +73,16 @@ class Dense : public Matrix< Real, Device, Index > template< typename Matrix > void setLike( const Matrix& matrix ); - /**** + /** + * \brief This method creates dense matrix from 2D initializer list. + * + * The matrix dimensions will be adjusted by the input data. + * + * @param data + */ + void setElements( std::initializer_list< std::initializer_list< RealType > > data ); + + /** * This method is only for the compatibility with the sparse matrices. */ void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); diff --git a/src/TNL/Matrices/Dense.hpp b/src/TNL/Matrices/Dense.hpp index 346c26ed8a628d5737e3dce143340a42364e5a9a..28f152444aeeedd8625055a59445e82530c7d7fb 100644 --- a/src/TNL/Matrices/Dense.hpp +++ b/src/TNL/Matrices/Dense.hpp @@ -37,6 +37,57 @@ Dense( const IndexType rows, const IndexType columns ) this->setDimensions( rows, columns ); } +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Dense( std::initializer_list< std::initializer_list< RealType > > data ) +{ + this->setElements( data ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElements( std::initializer_list< std::initializer_list< RealType > > data ) +{ + IndexType rows = data.size(); + IndexType columns = 0; + for( auto row : data ) + columns = max( columns, row.size() ); + this->setDimensions( rows, columns ); + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { + Dense< RealType, Devices::Host, IndexType > hostDense( rows, columns ); + IndexType rowIdx( 0 ); + for( auto row : data ) + { + IndexType columnIdx( 0 ); + for( auto element : row ) + hostDense.setElement( rowIdx, columnIdx++, element ); + rowIdx++; + } + *this = hostDense; + } + else + { + IndexType rowIdx( 0 ); + for( auto row : data ) + { + IndexType columnIdx( 0 ); + for( auto element : row ) + this->setElement( rowIdx, columnIdx++, element ); + rowIdx++; + } + } +} + template< typename Real, typename Device, typename Index, @@ -48,8 +99,7 @@ getView() -> ViewType { return ViewType( this->getRows(), this->getColumns(), - this->getValues().getView(), - this->segments.getView() ); + this->getValues().getView() ); } template< typename Real, @@ -63,8 +113,7 @@ getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), this->getColumns(), - this->getValues().getConstView(), - this->segments.getConstView() ); + this->getValues().getConstView() ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 95a7c47698fc27f7fa760a64c0176a147ebe391c..a7e1a09a78f336b94ae9f62ab84d2846d2e24602 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -64,8 +64,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > __cuda_callable__ DenseMatrixView( const IndexType rows, const IndexType columns, - const ValuesViewType& values, - const SegmentsViewType& segments ); + const ValuesViewType& values ); __cuda_callable__ DenseMatrixView( const DenseMatrixView& m ) = default; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 01415ec21c2446a255db57aae7df04cbe5813ed8..ddd9c93281b70a10d25e05768d409f41303e4774 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -35,10 +35,11 @@ __cuda_callable__ DenseMatrixView< Real, Device, Index, RowMajorOrder >:: DenseMatrixView( const IndexType rows, const IndexType columns, - const ValuesViewType& values, - const SegmentsViewType& segments ) - : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments ) + const ValuesViewType& values ) + : MatrixView< Real, Device, Index >( rows, columns, values ) { + SegmentsType a( rows, columns ); + segments = a.getView(); } template< typename Real, @@ -53,8 +54,7 @@ getView() -> ViewType return ViewType( this->getRows(), this->getColumns(), this->getValues().getView(), - this->columnIndexes.getView(), - this->segments.getView() ); + this->columnIndexes.getView() ); } template< typename Real, @@ -69,8 +69,7 @@ getConstView() const -> ConstViewType return ConstViewType( this->getRows(), this->getColumns(), this->getValues().getConstView(), - this->getColumnsIndexes().getConstView(), - this->segments.getConstView() ); + this->getColumnsIndexes().getConstView() ); } template< typename Real, diff --git a/src/TNL/Matrices/LambdaMatrix.h b/src/TNL/Matrices/LambdaMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..5f3ecdfb36bc447f44578c4c0493ae635b1e7cd4 --- /dev/null +++ b/src/TNL/Matrices/LambdaMatrix.h @@ -0,0 +1,164 @@ +/*************************************************************************** + LambdaMatrix.h - description + ------------------- + begin : Mar 17, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/String.h> +#include <TNL/Devices/AnyDevice.h> + +namespace TNL { +namespace Matrices { + +/** + * \brief "Matrix-free" matrix based on lambda functions. + * + * \tparam MatrixElementsLambda is a lambda function returning matrix elements + * values and positions. + * \tparam CompressedRowLengthsLambda is a lambda function returning a number + * of non-zero elements in each row. + * \tparam Real is a type of matrix elements values. + * \tparam Device is a device on which the lambda functions can evaluated. + * Devices::AnyDevice can be used for lambdas with no restriction. + * \ẗparam Index is a type used for indexing. + */ +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real = double, + typename Device = Devices::AnyDevice, + typename Index = int > +class LambdaMatrix +{ + public: + static constexpr bool isSymmetric() { return false; }; + static constexpr bool isBinary() { return false; }; + + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + + LambdaMatrix( MatrixElementsLambda& matrixElements, + CompressedRowLengthsLambda& compressedRowLentghs ); + + LambdaMatrix( const IndexType& rows, + const IndexType& columns, + MatrixElementsLambda& matrixElements, + CompressedRowLengthsLambda& compressedRowLentghs ); + + void setDimensions( const IndexType& rows, + const IndexType& columns ); + + __cuda_callable__ + IndexType getRows() const; + + __cuda_callable__ + IndexType getColumns() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + RealType getElement( const IndexType row, + const IndexType column ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + /*** + * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0 ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + void print( std::ostream& str ) const; + + protected: + + IndexType rows, columns; + + MatrixElementsLambda matrixElementsLambda; + + CompressedRowLengthsLambda compressedRowLengthsLambda; +}; + + +/** + * \brief Helper class for creating instances of LambdaMatrix. + * @param matrixElementsLambda + * @param compressedRowLengthsLambda + * @return + */ +template< typename Real = double, + typename Device = Devices::AnyDevice, + typename Index = int > +struct LambdaMatrixFactory +{ + using RealType = Real; + using IndexType = Index; + + template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda > + static auto create( MatrixElementsLambda& matrixElementsLambda, + CompressedRowLengthsLambda& compressedRowLengthsLambda ) + -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index > + { + return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >( + matrixElementsLambda, + compressedRowLengthsLambda ); + }; + + template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda > + static auto create( const IndexType& rows, + const IndexType& columns, + MatrixElementsLambda& matrixElementsLambda, + CompressedRowLengthsLambda& compressedRowLengthsLambda ) + -> LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index > + { + return LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >( + matrixElementsLambda, + compressedRowLengthsLambda ); + }; +}; + +} //namespace Matrices +} //namespace TNL + +#include <TNL/Matrices/LambdaMatrix.hpp> diff --git a/src/TNL/Matrices/LambdaMatrix.hpp b/src/TNL/Matrices/LambdaMatrix.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c992bd575bd7f2e09bbd50ded60191a90ceca297 --- /dev/null +++ b/src/TNL/Matrices/LambdaMatrix.hpp @@ -0,0 +1,359 @@ +/*************************************************************************** + LambdaMatrix.hpp - description + ------------------- + begin : Mar 17, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Matrices/LambdaMatrix.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Matrices/LambdaMatrix.h> + +namespace TNL { +namespace Matrices { + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +LambdaMatrix( MatrixElementsLambda& matrixElements, + CompressedRowLengthsLambda& compressedRowLengths ) +: rows( 0 ), columns( 0 ), matrixElementsLambda( matrixElements ), compressedRowLengthsLambda( compressedRowLengths ) +{ +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +LambdaMatrix( const IndexType& rows, + const IndexType& columns, + MatrixElementsLambda& matrixElements, + CompressedRowLengthsLambda& compressedRowLengths ) +: rows( rows ), columns( columns ), matrixElementsLambda( matrixElements ), compressedRowLengthsLambda( compressedRowLengths ) +{ +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +setDimensions( const IndexType& rows, + const IndexType& columns ) +{ + this->rows = rows; + this->columns = columns; +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +getRows() const +{ + return this->rows; +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +getColumns() const +{ + return this->columns; +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Vector > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; + + rowLengths.setSize( this->getRows() ); + const IndexType rows = this->getRows(); + const IndexType columns = this->getColumns(); + auto rowLengthsView = rowLengths.getView(); + auto compressedRowLengths = this->compressedRowLengthsLambda; + + if( std::is_same< typename Vector::DeviceType, Device_ >::value ) + Algorithms::ParallelFor< Device_ >::exec( + ( IndexType ) 0, + this->getRows(), + [=] __cuda_callable__ ( const IndexType row ) mutable { + rowLengthsView[ row ] = compressedRowLengths( rows, columns, row ); + } ); + else + { + Containers::Vector< IndexType, Device_, IndexType > aux( this->getRows() ); + auto auxView = aux.getView(); + Algorithms::ParallelFor< Device_ >::exec( + ( IndexType ) 0, + this->getRows(), + [=] __cuda_callable__ ( const IndexType row ) mutable { + auxView[ row ] = compressedRowLengths( rows, columns, row ); + } ); + rowLengths = aux; + } +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +Index +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +getNumberOfNonzeroMatrixElements() const +{ + Containers::Vector< IndexType, typename Devices::PickDevice< DeviceType >::DeviceType, IndexType > rowLengthsVector; + this->getCompressedRowLengths( rowLengthsVector ); + return sum( rowLengthsVector ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +Real +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +getElement( const IndexType row, + const IndexType column ) const +{ + using Device_ = typename Devices::PickDevice< Devices::Host >::DeviceType; + Containers::Array< RealType, Device_ > value( 1 ); + auto valueView = value.getView(); + auto rowLengths = this->compressedRowLengthsLambda; + auto matrixElements = this->matrixElementsLambda; + const IndexType rows = this->getRows(); + const IndexType columns = this->getColumns(); + auto getValue = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + const IndexType rowSize = rowLengths( rows, columns, row ); + valueView[ 0 ] = 0.0; + for( IndexType localIdx = 0; localIdx < rowSize; localIdx++ ) + { + RealType elementValue; + IndexType elementColumn; + matrixElements( rows, columns, row, localIdx, elementColumn, elementValue ); + if( elementColumn == column ) + { + valueView[ 0 ] = elementValue; + break; + } + } + }; + Algorithms::ParallelFor< Device_ >::exec( row, row + 1, getValue ); + return valueView.getElement( 0 ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename InVector, + typename OutVector > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType { + if( value == 0.0 ) + return 0.0; + return value * inVectorView[ columnIdx ]; + }; + auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + if( outVectorMultiplicator == 0.0 ) + outVectorView[ row ] = matrixMultiplicator * value; + else + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; + }; + this->allRowsReduction( fetch, reduce, keep, 0.0 ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + using FetchType = decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ); + using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; + + const IndexType rows = this->getRows(); + const IndexType columns = this->getColumns(); + auto rowLengths = this->compressedRowLengthsLambda; + auto matrixElements = this->matrixElementsLambda; + auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + const IndexType rowLength = rowLengths( rows, columns, rowIdx ); + FetchType result( zero ); + for( IndexType localIdx = 0; localIdx < rowLength; localIdx++ ) + { + IndexType elementColumn( 0 ); + RealType elementValue( 0.0 ); + matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue ); + FetchType fetchValue( zero ); + if( elementValue != 0.0 ) + fetchValue = fetch( rowIdx, localIdx, elementColumn, elementValue ); + reduce( result, fetchValue ); + } + keep( rowIdx, result ); + }; + Algorithms::ParallelFor< Device_ >::exec( first, last, processRow ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Function > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + using FetchType = decltype( fetch( IndexType(), IndexType(), RealType(), IndexType() ) ); + using Device_ = typename Devices::PickDevice< DeviceType >::DeviceType; + + const IndexType rows = this->getRows(); + const IndexType columns = this->getColumns(); + auto rowLengths = this->compressedRowLengthsLambda; + auto matrixElements = this->matrixElementsLambda; + auto processRow = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + const IndexType rowLength = rowLengths( rows, columns, rowIdx ); + bool compute( true ); + for( IndexType localIdx = 0; localIdx < rowLength && compute; localIdx++ ) + { + IndexType elementColumn( 0 ); + RealType elementValue( 0.0 ); + matrixElements( rows, columns, rowIdx, localIdx, elementColumn, elementValue ); + if( elementValue != 0.0 ) + function( rowIdx, localIdx, elementColumn, elementValue, compute ); + } + }; + Algorithms::ParallelFor< Device_ >::exec( first, last, processRow ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Function > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > + template< typename Vector1, typename Vector2 > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + +} + +template< typename MatrixElementsLambda, + typename CompressedRowLengthsLambda, + typename Real, + typename Device, + typename Index > +void +LambdaMatrix< MatrixElementsLambda, CompressedRowLengthsLambda, Real, Device, Index >:: +print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = 0; column < this->getColumns(); column++ ) + { + auto value = this->getElement( row, column ); + if( value != ( RealType ) 0 ) + str << " Col:" << column << "->" << value << "\t"; + } + str << std::endl; + } +} + +} //namespace Matrices +} //namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 032767518cc275e707961af48e474f4210256ef1..d012f918b82347b6aaa54f9eb3059e45805323c4 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -10,6 +10,7 @@ #pragma once +#include <map> #include <TNL/Matrices/Matrix.h> #include <TNL/Matrices/MatrixType.h> #include <TNL/Allocators/Default.h> @@ -92,6 +93,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > const RealAllocatorType& realAllocator = RealAllocatorType(), const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + template< typename MapIndex, + typename MapValue > + explicit SparseMatrix( const IndexType rows, + const IndexType columns, + const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ); + ViewType getView() const; // TODO: remove const ConstViewType getConstView() const; @@ -110,6 +117,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ); + template< typename MapIndex, + typename MapValue > + void setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ); + template< typename Vector > void getCompressedRowLengths( Vector& rowLengths ) const; @@ -225,6 +236,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > __cuda_callable__ IndexType getPaddingIndex() const; + SegmentsType& getSegments(); + + const SegmentsType& getSegments() const; + // TODO: restore it and also in Matrix // protected: @@ -234,8 +249,6 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > IndexAllocator indexAllocator; - //RealAllocator realAllocator; - ViewType view; }; diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 4c1f3b1ce41d27adf2a804e7171ec21f89ba7313..3a557d188113484b320cd56a51f4303e191be05e 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -109,6 +109,24 @@ SparseMatrix( const IndexType rows, this->setElements( data ); } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename MapIndex, + typename MapValue > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const IndexType rows, + const IndexType columns, + const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ) +{ + this->setDimensions( rows, columns ); + this->setElements( map ); +} + template< typename Real, typename Device, typename Index, @@ -247,6 +265,38 @@ setElements( const std::initializer_list< std::tuple< IndexType, IndexType, Real ( *this ) = hostMatrix; } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename MapIndex, + typename MapValue > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setElements( const std::map< std::pair< MapIndex, MapIndex > , MapValue >& map ) +{ + Containers::Vector< IndexType, Devices::Host, IndexType > rowsCapacities( this->getRows(), 0 ); + for( auto element : map ) + rowsCapacities[ element.first.first ]++; + if( !std::is_same< DeviceType, Devices::Host >::value ) + { + SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( this->getRows(), this->getColumns() ); + hostMatrix.setCompressedRowLengths( rowsCapacities ); + for( auto element : map ) + hostMatrix.setElement( element.first.first, element.first.second, element.second ); + *this = hostMatrix; + } + else + { + this->setCompressedRowLengths( rowsCapacities ); + for( auto element : map ) + this->setElement( element.first.first, element.first.second, element.second ); + } +} + template< typename Real, typename Device, typename Index, @@ -816,7 +866,7 @@ operator=( const RHSMatrix& matrix ) const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; matrixValuesBuffer_view[ bufferIdx ] = value; - //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl; + //printf( "TO BUFFER: rowIdx = %d localIdx = %d bufferIdx = %d column = %d value = %d \n", rowIdx, localIdx, bufferIdx, columnIndex, value ); } }; matrix.forRows( baseRow, lastRow, f1 ); @@ -951,5 +1001,33 @@ getPaddingIndex() const return -1; } +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getSegments() -> SegmentsType& +{ + return this->segments; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getSegments() const -> const SegmentsType& +{ + return this->segments; +} + } //namespace Matrices } // namespace TNL diff --git a/src/TNL/String.h b/src/TNL/String.h index f35abc377177b6b061b68074714ce3e143b55d22..228cb5bcb18d07cdf493ff9fdbc36c9539baef63 100644 --- a/src/TNL/String.h +++ b/src/TNL/String.h @@ -375,7 +375,7 @@ String convertToString( const T& value ) /** * \brief Specialization of function \ref convertToString for boolean. * - * The boolean type is converted to 'true' ot 'false'. + * The boolean type is converted to 'true' or 'false'. */ template<> inline String convertToString( const bool& b ) { diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index 255a67fb911b995ea409c341fe9a00104b5d95bf..4f6fd7c92c8ea1198e6c9e521c26951df5060e15 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -149,6 +149,11 @@ TYPED_TEST( ArrayTest, constructors ) v.reset(); EXPECT_EQ( w.getSize(), 10 ); + Containers::Array< int > int_array( 10, 1 ); + ArrayType int_array_copy( int_array ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( int_array_copy.getElement( i ), 1 ); + ArrayType a1 { 1, 2, 3 }; EXPECT_EQ( a1.getElement( 0 ), 1 ); EXPECT_EQ( a1.getElement( 1 ), 2 ); diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index b6602ba141dbeb3a45470c58cf6130544b85d944..ca495abba4ab720deed2d7eb2c5d482229973fdc 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -72,6 +72,12 @@ TYPED_TEST( VectorTest, constructors ) EXPECT_EQ( a3.getElement( 0 ), 7 ); EXPECT_EQ( a3.getElement( 1 ), 8 ); EXPECT_EQ( a3.getElement( 2 ), 9 ); + + VectorType a4( 2 * a2 + 3 * a3 ); + EXPECT_EQ( a4.getElement( 0 ), 2.0 * a2.getElement( 0 ) + 3 * a3.getElement( 0 ) ); + EXPECT_EQ( a4.getElement( 1 ), 2.0 * a2.getElement( 1 ) + 3 * a3.getElement( 1 ) ); + EXPECT_EQ( a4.getElement( 2 ), 2.0 * a2.getElement( 2 ) + 3 * a3.getElement( 2 ) ); + } TEST( VectorSpecialCasesTest, defaultConstructors ) diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 1c536a98210b59789d2a7b34a9b9935150a7e0ac..f2ffd0c4bab1af47284ee98a09ff1f1002c5647b 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -22,6 +22,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) @@ -40,6 +43,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( LambdaMatrixTest LambdaMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( LambdaMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) @@ -69,6 +75,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) @@ -93,6 +103,10 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SymmetricSparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( LambdaMatrixTest LambdaMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( LambdaMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( LambdaMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) @@ -102,12 +116,14 @@ ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixT ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( LambdaMatrixTest ${EXECUTABLE_OUTPUT_PATH}/LambdaMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 37ae58bf1a1e7e8b03220c4916ba79cf48729ef9..8791b51fa6d8eb14a79f032a0cac0d1d91c653fd 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -46,42 +46,68 @@ void test_GetSerializationType() template< typename Matrix > void test_SetDimensions() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - const IndexType rows = 9; - const IndexType cols = 8; + const IndexType rows = 9; + const IndexType cols = 8; - Matrix m; - m.setDimensions( rows, cols ); + Matrix m; + m.setDimensions( rows, cols ); - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); } template< typename Matrix1, typename Matrix2 > void test_SetLike() { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; - const IndexType rows = 8; - const IndexType cols = 7; + const IndexType rows = 8; + const IndexType cols = 7; - Matrix1 m1; - m1.reset(); - m1.setDimensions( rows + 1, cols + 2 ); + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); - Matrix2 m2; - m2.reset(); - m2.setDimensions( rows, cols ); + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} - m1.setLike( m2 ); +template< typename Matrix > +void test_SetElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); + Matrix m( { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 }, + } ); + + EXPECT_EQ( m.getRows(), 3 ); + EXPECT_EQ( m.getColumns(), 3 ); + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 1, 0 ), 4 ); + EXPECT_EQ( m.getElement( 1, 1 ), 5 ); + EXPECT_EQ( m.getElement( 1, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 0 ), 7 ); + EXPECT_EQ( m.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m.getElement( 2, 2 ), 9 ); } template< typename Matrix > @@ -1386,6 +1412,13 @@ TYPED_TEST( MatrixTest, setLikeTest ) test_SetLike< MatrixType, MatrixType >(); } +TYPED_TEST( MatrixTest, setElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElements< MatrixType >(); +} + TYPED_TEST( MatrixTest, getRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.cpp b/src/UnitTests/Matrices/LambdaMatrixTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9da03ede3efa0addfc30c3245f26f35971d99514 --- /dev/null +++ b/src/UnitTests/Matrices/LambdaMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + LambdaMatrixTest.cpp - description + ------------------- + begin : Mar 18, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "LambdaMatrixTest.h" diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.cu b/src/UnitTests/Matrices/LambdaMatrixTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..f7af7af4160ceaf59c39b95759936560bf0868f0 --- /dev/null +++ b/src/UnitTests/Matrices/LambdaMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + LambdaMatrixTest.cu - description + ------------------- + begin : Mar 18, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "LambdaMatrixTest.h" diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.h b/src/UnitTests/Matrices/LambdaMatrixTest.h new file mode 100644 index 0000000000000000000000000000000000000000..07d1f336c4d5cc76c74bf19711fc4d2b68d3684b --- /dev/null +++ b/src/UnitTests/Matrices/LambdaMatrixTest.h @@ -0,0 +1,115 @@ +/*************************************************************************** + LambdaMatrixTest.h - description + ------------------- + begin : Mar 18, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> +#include <TNL/Matrices/LambdaMatrix.h> +#include <TNL/Devices/AnyDevice.h> +#include <TNL/Devices/Host.h> +#include "LambdaMatrixTest.hpp" +#include <iostream> + +template< typename Real, + typename Device, + typename Index > +struct LambdaMatrixParameters +{ + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; +}; + + +// test fixture for typed tests +template< typename Matrix > +class LambdaMatrixTest : public ::testing::Test +{ +protected: + using LambdaMatrixType = Matrix; +}; + + +// types for which MatrixTest is instantiated +using LambdaMatrixTypes = ::testing::Types +< + LambdaMatrixParameters< int, TNL::Devices::Host, int >, + LambdaMatrixParameters< long, TNL::Devices::Host, int >, + LambdaMatrixParameters< float, TNL::Devices::Host, int >, + LambdaMatrixParameters< double, TNL::Devices::Host, int >, + LambdaMatrixParameters< int, TNL::Devices::Host, long >, + LambdaMatrixParameters< long, TNL::Devices::Host, long >, + LambdaMatrixParameters< float, TNL::Devices::Host, long >, + LambdaMatrixParameters< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,LambdaMatrixParameters< int, TNL::Devices::Cuda, int >, + LambdaMatrixParameters< long, TNL::Devices::Cuda, int >, + LambdaMatrixParameters< float, TNL::Devices::Cuda, int >, + LambdaMatrixParameters< double, TNL::Devices::Cuda, int >, + LambdaMatrixParameters< int, TNL::Devices::Cuda, long >, + LambdaMatrixParameters< long, TNL::Devices::Cuda, long >, + LambdaMatrixParameters< float, TNL::Devices::Cuda, long >, + LambdaMatrixParameters< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( LambdaMatrixTest, LambdaMatrixTypes); + +TYPED_TEST( LambdaMatrixTest, Constructors ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_Constructors< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, setDimensionsTest ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_SetDimensions< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, getCompressedRowLengthsTest ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_GetCompressedRowLengths< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, getElementTest ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_GetElement< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, vectorProductTest ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_VectorProduct< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, rowsReduction ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_RowsReduction< LambdaMatrixParametersType >(); +} + +TYPED_TEST( LambdaMatrixTest, printTest ) +{ + using LambdaMatrixParametersType = typename TestFixture::LambdaMatrixType; + + test_Print< LambdaMatrixParametersType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/LambdaMatrixTest.hpp b/src/UnitTests/Matrices/LambdaMatrixTest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..23963c11906431fa6f80926cb21c7d5d12913c51 --- /dev/null +++ b/src/UnitTests/Matrices/LambdaMatrixTest.hpp @@ -0,0 +1,306 @@ +/*************************************************************************** + LambdaMatrixTest.h - description + ------------------- + begin : Mar 18, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <iostream> +#include <sstream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +template< typename Matrix > +void test_Constructors() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { return 1; }; + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + columnIdx = rowIdx; + value = 1.0; + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType m( size, size, matrixElements, rowLengths ); + + EXPECT_EQ( m.getRows(), size ); + EXPECT_EQ( m.getColumns(), size ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { return 1; }; + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + columnIdx = rowIdx; + value = 1.0; + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType m( size, size, matrixElements, rowLengths ); + + EXPECT_EQ( m.getRows(), size ); + EXPECT_EQ( m.getColumns(), size ); + + m.setDimensions( 10, 10 ); + EXPECT_EQ( m.getRows(), 10 ); + EXPECT_EQ( m.getColumns(), 10 ); + +} + +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { + if( rowIdx == 0 || rowIdx == size - 1 ) + return 1; + return 3; + }; + + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + if( rowIdx == 0 || rowIdx == size -1 ) + { + columnIdx = rowIdx; + value = 1.0; + } + else + { + columnIdx = rowIdx + localIdx - 1; + value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; + } + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType m( size, size, matrixElements, rowLengths ); + TNL::Containers::Vector< IndexType > correctRowLengths{ 1, 3, 3, 3, 1 }; + TNL::Containers::Vector< IndexType > rowLengthsVector; + m.getCompressedRowLengths( rowLengthsVector ); + for( int i = 0; i < size; i++ ) + EXPECT_EQ( correctRowLengths[ i ], rowLengthsVector[ i ] ); +} + +template< typename Matrix > +void test_GetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { + if( rowIdx == 0 || rowIdx == size - 1 ) + return 1; + return 3; + }; + + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + if( rowIdx == 0 || rowIdx == size -1 ) + { + columnIdx = rowIdx; + value = 1.0; + } + else + { + columnIdx = rowIdx + localIdx - 1; + value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; + } + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType m( size, size, matrixElements, rowLengths ); + EXPECT_EQ( m.getElement( 0, 0 ), 1.0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0.0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0.0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0.0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0.0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1.0 ); + EXPECT_EQ( m.getElement( 1, 1 ), -2.0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1.0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0.0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0.0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0.0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1.0 ); + EXPECT_EQ( m.getElement( 2, 2 ), -2.0 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1.0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0.0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0.0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0.0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1.0 ); + EXPECT_EQ( m.getElement( 3, 3 ), -2.0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 1.0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0.0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0.0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0.0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0.0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1.0 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { + if( rowIdx == 0 || rowIdx == size - 1 ) + return 1; + return 3; + }; + + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + if( rowIdx == 0 || rowIdx == size -1 ) + { + columnIdx = rowIdx; + value = 1.0; + } + else + { + columnIdx = rowIdx + localIdx - 1; + value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; + } + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType A( size, size, matrixElements, rowLengths ); + TNL::Containers::Vector< RealType, DeviceType, IndexType > x( size, 1.0 ), b( size, 5.0 ); + A.vectorProduct( x, b ); + EXPECT_EQ( b.getElement( 0 ), 1.0 ); + EXPECT_EQ( b.getElement( 1 ), 0.0 ); + EXPECT_EQ( b.getElement( 2 ), 0.0 ); + EXPECT_EQ( b.getElement( 3 ), 0.0 ); + EXPECT_EQ( b.getElement( 4 ), 1.0 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { + if( rowIdx == 0 || rowIdx == size - 1 ) + return 1; + return 3; + }; + + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + if( rowIdx == 0 || rowIdx == size -1 ) + { + columnIdx = rowIdx; + value = 1.0; + } + else + { + columnIdx = rowIdx + localIdx - 1; + value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; + } + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType A( size, size, matrixElements, rowLengths ); + TNL::Containers::Vector< RealType, DeviceType, IndexType > v( size, -1.0 ); + auto vView = v.getView(); + + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType columnIdx, const RealType& value ) mutable -> RealType { + return value; + }; + auto reduce = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keep = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + vView[ row ] = value; + }; + A.allRowsReduction( fetch, reduce, keep, 0.0 ); + + EXPECT_EQ( v.getElement( 0 ), 1.0 ); + EXPECT_EQ( v.getElement( 1 ), 0.0 ); + EXPECT_EQ( v.getElement( 2 ), 0.0 ); + EXPECT_EQ( v.getElement( 3 ), 0.0 ); + EXPECT_EQ( v.getElement( 4 ), 1.0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + IndexType size = 5; + auto rowLengths = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx ) -> IndexType { + if( rowIdx == 0 || rowIdx == size - 1 ) + return 1; + return 3; + }; + + auto matrixElements = [=] __cuda_callable__ ( const IndexType rows, const IndexType columns, const IndexType rowIdx, const IndexType localIdx, IndexType& columnIdx, RealType& value ) { + if( rowIdx == 0 || rowIdx == size -1 ) + { + columnIdx = rowIdx; + value = 1.0; + } + else + { + columnIdx = rowIdx + localIdx - 1; + value = ( columnIdx == rowIdx ) ? -2.0 : 1.0; + } + }; + + using MatrixType = decltype( TNL::Matrices::LambdaMatrixFactory< RealType, DeviceType, IndexType >::create( matrixElements, rowLengths ) ); + + MatrixType m( size, size, matrixElements, rowLengths ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1\t\n" + "Row: 1 -> Col:0->1 Col:1->-2 Col:2->1\t\n" + "Row: 2 -> Col:1->1 Col:2->-2 Col:3->1\t\n" + "Row: 3 -> Col:2->1 Col:3->-2 Col:4->1\t\n" + "Row: 4 -> Col:4->1\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + + +#endif // HAVE_GTEST diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 30d3a692d1a843e90600bffa560314535762e7ad..12cdbeef3fca46946193ff95f7a9f8ab455e0d19 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -1,13 +1,15 @@ /*************************************************************************** SparseMatrixTest.h - description ------------------- - begin : Nov 22, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ +#pragma once + #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> #include <TNL/Math.h> @@ -15,1391 +17,103 @@ #include <iostream> #include <sstream> +#include "SparseMatrixTest.hpp" + #ifdef HAVE_GTEST #include <gtest/gtest.h> -template< typename MatrixHostFloat, typename MatrixHostInt > -void host_test_GetType() -{ - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; -} - -template< typename MatrixCudaFloat, typename MatrixCudaInt > -void cuda_test_GetType() -{ - bool testRan = false; - EXPECT_TRUE( testRan ); - std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; -} - +// test fixture for typed tests template< typename Matrix > -void test_Constructors() +class MatrixTest : public ::testing::Test { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - Matrix m1( 5, 6 ); - EXPECT_EQ( m1.getRows(), 5 ); - EXPECT_EQ( m1.getColumns(), 6 ); - - Matrix m2( {1, 2, 2, 2, 1 }, 5 ); - typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; - m2.setElement( 0, 0, 1 ); // 0th row - m2.setElement( 1, 0, 1 ); // 1st row - m2.setElement( 1, 1, 1 ); - m2.setElement( 2, 1, 1 ); // 2nd row - m2.setElement( 2, 2, 1 ); - m2.setElement( 3, 2, 1 ); // 3rd row - m2.setElement( 3, 3, 1 ); - m2.setElement( 4, 4, 1 ); // 4th row - m2.getCompressedRowLengths( v1 ); - - EXPECT_EQ( v1, v2 ); - - /* - * Sets up the following 6x5 sparse matrix: - * - * / 1 2 3 0 0 \ - * | 0 4 5 6 0 | - * | 0 0 7 8 9 | - * | 10 0 0 0 0 | - * | 0 11 0 0 0 | - * \ 0 0 0 12 0 / - */ - - Matrix m3( 6, 5, { - { 0, 0, 1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 }, - { 1, 1, 4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 }, - { 2, 2, 7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 }, - { 3, 0, 10.0 }, - { 4, 1, 11.0 }, - { 5, 3, 12.0 } } ); - - // Check the set elements - EXPECT_EQ( m3.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m3.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m3.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m3.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m3.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m3.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m3.getElement( 1, 1 ), 4 ); - EXPECT_EQ( m3.getElement( 1, 2 ), 5 ); - EXPECT_EQ( m3.getElement( 1, 3 ), 6 ); - EXPECT_EQ( m3.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m3.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m3.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m3.getElement( 2, 2 ), 7 ); - EXPECT_EQ( m3.getElement( 2, 3 ), 8 ); - EXPECT_EQ( m3.getElement( 2, 4 ), 9 ); - - EXPECT_EQ( m3.getElement( 3, 0 ), 10 ); - EXPECT_EQ( m3.getElement( 3, 1 ), 0 ); - EXPECT_EQ( m3.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m3.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m3.getElement( 3, 4 ), 0 ); +protected: + using MatrixType = Matrix; +}; - EXPECT_EQ( m3.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m3.getElement( 4, 1 ), 11 ); - EXPECT_EQ( m3.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m3.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m3.getElement( 4, 4 ), 0 ); +TYPED_TEST_SUITE( MatrixTest, MatrixTypes); - EXPECT_EQ( m3.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m3.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m3.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m3.getElement( 5, 3 ), 12 ); - EXPECT_EQ( m3.getElement( 5, 4 ), 0 ); -} - -template< typename Matrix > -void test_SetDimensions() +TYPED_TEST( MatrixTest, Constructors ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - const IndexType rows = 9; - const IndexType cols = 8; + using MatrixType = typename TestFixture::MatrixType; - Matrix m; - m.setDimensions( rows, cols ); - - EXPECT_EQ( m.getRows(), 9 ); - EXPECT_EQ( m.getColumns(), 8 ); + test_Constructors< MatrixType >(); } -template< typename Matrix > -void test_SetCompressedRowLengths() +TYPED_TEST( MatrixTest, setDimensionsTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - const IndexType rows = 10; - const IndexType cols = 11; - - Matrix m( rows, cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); - - IndexType rowLength = 1; - for( IndexType i = 2; i < rows; i++ ) - rowLengths.setElement( i, rowLength++ ); - - m.setCompressedRowLengths( rowLengths ); - - // Insert values into the rows. - RealType value = 1; - - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) // 1st row - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) // 4th row - m.setElement( 4, i, value++ ); + using MatrixType = typename TestFixture::MatrixType; - for( IndexType i = 0; i < 4; i++ ) // 5th row - m.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 5; i++ ) // 6th row - m.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 6; i++ ) // 7th row - m.setElement( 7, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 8th row - m.setElement( 8, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 9th row - m.setElement( 9, i, value++ ); - - rowLengths = 0; - m.getCompressedRowLengths( rowLengths ); - typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; - EXPECT_EQ( rowLengths, correctRowLengths ); + test_SetDimensions< MatrixType >(); } -template< typename Matrix1, typename Matrix2 > -void test_SetLike() +TYPED_TEST( MatrixTest, setCompressedRowLengthsTest ) { - using RealType = typename Matrix1::RealType; - using DeviceType = typename Matrix1::DeviceType; - using IndexType = typename Matrix1::IndexType; + using MatrixType = typename TestFixture::MatrixType; - const IndexType rows = 8; - const IndexType cols = 7; - - Matrix1 m1( rows + 1, cols + 2 ); - Matrix2 m2( rows, cols ); - - m1.setLike( m2 ); - - EXPECT_EQ( m1.getRows(), m2.getRows() ); - EXPECT_EQ( m1.getColumns(), m2.getColumns() ); + test_SetCompressedRowLengths< MatrixType >(); } -template< typename Matrix > -void test_GetNumberOfNonzeroMatrixElements() +TYPED_TEST( MatrixTest, setLikeTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); + using MatrixType = typename TestFixture::MatrixType; - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); - - for( IndexType j = 8; j < rows; j++) - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); - - EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); + test_SetLike< MatrixType, MatrixType >(); } -template< typename Matrix > -void test_Reset() +TYPED_TEST( MatrixTest, resetTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 5x4 sparse matrix: - * - * / 0 0 0 0 \ - * | 0 0 0 0 | - * | 0 0 0 0 | - * | 0 0 0 0 | - * \ 0 0 0 0 / - */ + using MatrixType = typename TestFixture::MatrixType; - const IndexType rows = 5; - const IndexType cols = 4; - - Matrix m( rows, cols ); - m.reset(); - - EXPECT_EQ( m.getRows(), 0 ); - EXPECT_EQ( m.getColumns(), 0 ); + test_Reset< MatrixType >(); } -template< typename Matrix > -void test_GetRow() +TYPED_TEST( MatrixTest, getRowTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); - - auto matrixView = m.getView(); - auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { - auto row = matrixView.getRow( rowIdx ); - RealType val; - switch( rowIdx ) - { - case 0: - val = 1; - for( IndexType i = 0; i < 4; i++ ) - row.setElement( i, 2 * i, val++ ); - break; - case 1: - val = 5; - for( IndexType i = 0; i < 3; i++ ) - row.setElement( i, i, val++ ); - break; - case 2: - val = 8; - for( IndexType i = 0; i < 8; i++ ) - row.setElement( i, i, val++ ); - break; - case 3: - val = 16; - for( IndexType i = 0; i < 2; i++ ) - row.setElement( i, i, val++ ); - break; - case 4: - row.setElement( 0, 0, 18 ); - break; - case 5: - row.setElement( 0, 0, 19 ); - break; - case 6: - row.setElement( 0, 0, 20 ); - break; - case 7: - row.setElement( 0, 0, 21 ); - break; - case 8: - val = 22; - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, val++ ); - break; - case 9: - val = 32; - for( IndexType i = 0; i < rows; i++ ) - row.setElement( i, i, val++ ); - break; - } - }; - TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 2 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 3 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 4 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 5 ); - EXPECT_EQ( m.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m.getElement( 1, 2 ), 7 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 8 ); - EXPECT_EQ( m.getElement( 2, 1 ), 9 ); - EXPECT_EQ( m.getElement( 2, 2 ), 10 ); - EXPECT_EQ( m.getElement( 2, 3 ), 11 ); - EXPECT_EQ( m.getElement( 2, 4 ), 12 ); - EXPECT_EQ( m.getElement( 2, 5 ), 13 ); - EXPECT_EQ( m.getElement( 2, 6 ), 14 ); - EXPECT_EQ( m.getElement( 2, 7 ), 15 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + using MatrixType = typename TestFixture::MatrixType; - EXPECT_EQ( m.getElement( 4, 0 ), 18 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 19 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 20 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 21 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 22 ); - EXPECT_EQ( m.getElement( 8, 1 ), 23 ); - EXPECT_EQ( m.getElement( 8, 2 ), 24 ); - EXPECT_EQ( m.getElement( 8, 3 ), 25 ); - EXPECT_EQ( m.getElement( 8, 4 ), 26 ); - EXPECT_EQ( m.getElement( 8, 5 ), 27 ); - EXPECT_EQ( m.getElement( 8, 6 ), 28 ); - EXPECT_EQ( m.getElement( 8, 7 ), 29 ); - EXPECT_EQ( m.getElement( 8, 8 ), 30 ); - EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 32 ); - EXPECT_EQ( m.getElement( 9, 1 ), 33 ); - EXPECT_EQ( m.getElement( 9, 2 ), 34 ); - EXPECT_EQ( m.getElement( 9, 3 ), 35 ); - EXPECT_EQ( m.getElement( 9, 4 ), 36 ); - EXPECT_EQ( m.getElement( 9, 5 ), 37 ); - EXPECT_EQ( m.getElement( 9, 6 ), 38 ); - EXPECT_EQ( m.getElement( 9, 7 ), 39 ); - EXPECT_EQ( m.getElement( 9, 8 ), 40 ); - EXPECT_EQ( m.getElement( 9, 9 ), 41 ); + test_GetRow< MatrixType >(); } - -template< typename Matrix > -void test_SetElement() +TYPED_TEST( MatrixTest, setElementTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 10x10 sparse matrix: - * - * / 1 0 2 0 3 0 4 0 0 0 \ - * | 5 6 7 0 0 0 0 0 0 0 | - * | 8 9 10 11 12 13 14 15 0 0 | - * | 16 17 0 0 0 0 0 0 0 0 | - * | 18 0 0 0 0 0 0 0 0 0 | - * | 19 0 0 0 0 0 0 0 0 0 | - * | 20 0 0 0 0 0 0 0 0 0 | - * | 21 0 0 0 0 0 0 0 0 0 | - * | 22 23 24 25 26 27 28 29 30 31 | - * \ 32 33 34 35 36 37 38 39 40 41 / - */ - - const IndexType rows = 10; - const IndexType cols = 10; - - Matrix m; - m.reset(); - - m.setDimensions( rows, cols ); - - typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < 4; i++ ) - m.setElement( 0, 2 * i, value++ ); - - for( IndexType i = 0; i < 3; i++ ) - m.setElement( 1, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) - m.setElement( 2, i, value++ ); - - for( IndexType i = 0; i < 2; i++ ) - m.setElement( 3, i, value++ ); - - for( IndexType i = 4; i < 8; i++ ) - m.setElement( i, 0, value++ ); + using MatrixType = typename TestFixture::MatrixType; - for( IndexType j = 8; j < rows; j++) - for( IndexType i = 0; i < cols; i++ ) - m.setElement( j, i, value++ ); - - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 0 ); - EXPECT_EQ( m.getElement( 0, 2 ), 2 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 3 ); - EXPECT_EQ( m.getElement( 0, 5 ), 0 ); - EXPECT_EQ( m.getElement( 0, 6 ), 4 ); - EXPECT_EQ( m.getElement( 0, 7 ), 0 ); - EXPECT_EQ( m.getElement( 0, 8 ), 0 ); - EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 5 ); - EXPECT_EQ( m.getElement( 1, 1 ), 6 ); - EXPECT_EQ( m.getElement( 1, 2 ), 7 ); - EXPECT_EQ( m.getElement( 1, 3 ), 0 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - EXPECT_EQ( m.getElement( 1, 5 ), 0 ); - EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - EXPECT_EQ( m.getElement( 1, 7 ), 0 ); - EXPECT_EQ( m.getElement( 1, 8 ), 0 ); - EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 8 ); - EXPECT_EQ( m.getElement( 2, 1 ), 9 ); - EXPECT_EQ( m.getElement( 2, 2 ), 10 ); - EXPECT_EQ( m.getElement( 2, 3 ), 11 ); - EXPECT_EQ( m.getElement( 2, 4 ), 12 ); - EXPECT_EQ( m.getElement( 2, 5 ), 13 ); - EXPECT_EQ( m.getElement( 2, 6 ), 14 ); - EXPECT_EQ( m.getElement( 2, 7 ), 15 ); - EXPECT_EQ( m.getElement( 2, 8 ), 0 ); - EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 0 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - EXPECT_EQ( m.getElement( 3, 5 ), 0 ); - EXPECT_EQ( m.getElement( 3, 6 ), 0 ); - EXPECT_EQ( m.getElement( 3, 7 ), 0 ); - EXPECT_EQ( m.getElement( 3, 8 ), 0 ); - EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 18 ); - EXPECT_EQ( m.getElement( 4, 1 ), 0 ); - EXPECT_EQ( m.getElement( 4, 2 ), 0 ); - EXPECT_EQ( m.getElement( 4, 3 ), 0 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - EXPECT_EQ( m.getElement( 4, 5 ), 0 ); - EXPECT_EQ( m.getElement( 4, 6 ), 0 ); - EXPECT_EQ( m.getElement( 4, 7 ), 0 ); - EXPECT_EQ( m.getElement( 4, 8 ), 0 ); - EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 19 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 0 ); - EXPECT_EQ( m.getElement( 5, 3 ), 0 ); - EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - EXPECT_EQ( m.getElement( 5, 5 ), 0 ); - EXPECT_EQ( m.getElement( 5, 6 ), 0 ); - EXPECT_EQ( m.getElement( 5, 7 ), 0 ); - EXPECT_EQ( m.getElement( 5, 8 ), 0 ); - EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 6, 0 ), 20 ); - EXPECT_EQ( m.getElement( 6, 1 ), 0 ); - EXPECT_EQ( m.getElement( 6, 2 ), 0 ); - EXPECT_EQ( m.getElement( 6, 3 ), 0 ); - EXPECT_EQ( m.getElement( 6, 4 ), 0 ); - EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - EXPECT_EQ( m.getElement( 6, 6 ), 0 ); - EXPECT_EQ( m.getElement( 6, 7 ), 0 ); - EXPECT_EQ( m.getElement( 6, 8 ), 0 ); - EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 7, 0 ), 21 ); - EXPECT_EQ( m.getElement( 7, 1 ), 0 ); - EXPECT_EQ( m.getElement( 7, 2 ), 0 ); - EXPECT_EQ( m.getElement( 7, 3 ), 0 ); - EXPECT_EQ( m.getElement( 7, 4 ), 0 ); - EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - EXPECT_EQ( m.getElement( 7, 6 ), 0 ); - EXPECT_EQ( m.getElement( 7, 7 ), 0 ); - EXPECT_EQ( m.getElement( 7, 8 ), 0 ); - EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - - EXPECT_EQ( m.getElement( 8, 0 ), 22 ); - EXPECT_EQ( m.getElement( 8, 1 ), 23 ); - EXPECT_EQ( m.getElement( 8, 2 ), 24 ); - EXPECT_EQ( m.getElement( 8, 3 ), 25 ); - EXPECT_EQ( m.getElement( 8, 4 ), 26 ); - EXPECT_EQ( m.getElement( 8, 5 ), 27 ); - EXPECT_EQ( m.getElement( 8, 6 ), 28 ); - EXPECT_EQ( m.getElement( 8, 7 ), 29 ); - EXPECT_EQ( m.getElement( 8, 8 ), 30 ); - EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - - EXPECT_EQ( m.getElement( 9, 0 ), 32 ); - EXPECT_EQ( m.getElement( 9, 1 ), 33 ); - EXPECT_EQ( m.getElement( 9, 2 ), 34 ); - EXPECT_EQ( m.getElement( 9, 3 ), 35 ); - EXPECT_EQ( m.getElement( 9, 4 ), 36 ); - EXPECT_EQ( m.getElement( 9, 5 ), 37 ); - EXPECT_EQ( m.getElement( 9, 6 ), 38 ); - EXPECT_EQ( m.getElement( 9, 7 ), 39 ); - EXPECT_EQ( m.getElement( 9, 8 ), 40 ); - EXPECT_EQ( m.getElement( 9, 9 ), 41 ); + test_SetElement< MatrixType >(); } -template< typename Matrix > -void test_AddElement() +TYPED_TEST( MatrixTest, addElementTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 6x5 sparse matrix: - * - * / 1 2 3 0 0 \ - * | 0 4 5 6 0 | - * | 0 0 7 8 9 | - * | 10 1 1 0 0 | - * | 0 11 1 1 0 | - * \ 0 0 1 12 1 / - */ - - const IndexType rows = 6; - const IndexType cols = 5; - - Matrix m( rows, cols, { - { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, - { 1, 1, 4 }, { 1, 2, 5 }, { 1, 3, 6 }, - { 2, 2, 7 }, { 2, 3, 8 }, { 2, 4, 9 }, - { 3, 0, 10 }, { 3, 1, 1 }, { 3, 2, 1 }, - { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3, 1 }, - { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } ); - /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < cols - 2; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 1st row - m.setElement( 1, i, value++ ); + using MatrixType = typename TestFixture::MatrixType; - for( IndexType i = 2; i < cols; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - m.setElement( 3, 0, value++ ); // 3rd row - - m.setElement( 4, 1, value++ ); // 4th row - - m.setElement( 5, 3, value++ ); // 5th row*/ - - - // Check the set elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m.getElement( 1, 1 ), 4 ); - EXPECT_EQ( m.getElement( 1, 2 ), 5 ); - EXPECT_EQ( m.getElement( 1, 3 ), 6 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 7 ); - EXPECT_EQ( m.getElement( 2, 3 ), 8 ); - EXPECT_EQ( m.getElement( 2, 4 ), 9 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 10 ); - EXPECT_EQ( m.getElement( 3, 1 ), 1 ); - EXPECT_EQ( m.getElement( 3, 2 ), 1 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 11 ); - EXPECT_EQ( m.getElement( 4, 2 ), 1 ); - EXPECT_EQ( m.getElement( 4, 3 ), 1 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 1 ); - EXPECT_EQ( m.getElement( 5, 3 ), 12 ); - EXPECT_EQ( m.getElement( 5, 4 ), 1 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. - /* - * The following setup results in the following 6x5 sparse matrix: - * - * / 3 6 9 0 0 \ - * | 0 12 15 18 0 | - * | 0 0 21 24 27 | - * | 30 13 14 0 0 | - * | 0 35 16 17 0 | - * \ 0 0 18 41 20 / - */ - - RealType newValue = 1; - for( IndexType i = 0; i < cols - 2; i++ ) // 0th row - m.addElement( 0, i, newValue++, 2.0 ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 1st row - m.addElement( 1, i, newValue++, 2.0 ); - - for( IndexType i = 2; i < cols; i++ ) // 2nd row - m.addElement( 2, i, newValue++, 2.0 ); - - for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row - m.addElement( 3, i, newValue++, 2.0 ); - - for( IndexType i = 1; i < cols - 1; i++ ) // 4th row - m.addElement( 4, i, newValue++, 2.0 ); - - for( IndexType i = 2; i < cols; i++ ) // 5th row - m.addElement( 5, i, newValue++, 2.0 ); - - - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 0 ); - EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 0 ); - EXPECT_EQ( m.getElement( 1, 1 ), 12 ); - EXPECT_EQ( m.getElement( 1, 2 ), 15 ); - EXPECT_EQ( m.getElement( 1, 3 ), 18 ); - EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 0 ); - EXPECT_EQ( m.getElement( 2, 1 ), 0 ); - EXPECT_EQ( m.getElement( 2, 2 ), 21 ); - EXPECT_EQ( m.getElement( 2, 3 ), 24 ); - EXPECT_EQ( m.getElement( 2, 4 ), 27 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 30 ); - EXPECT_EQ( m.getElement( 3, 1 ), 13 ); - EXPECT_EQ( m.getElement( 3, 2 ), 14 ); - EXPECT_EQ( m.getElement( 3, 3 ), 0 ); - EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 0 ); - EXPECT_EQ( m.getElement( 4, 1 ), 35 ); - EXPECT_EQ( m.getElement( 4, 2 ), 16 ); - EXPECT_EQ( m.getElement( 4, 3 ), 17 ); - EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 0 ); - EXPECT_EQ( m.getElement( 5, 1 ), 0 ); - EXPECT_EQ( m.getElement( 5, 2 ), 18 ); - EXPECT_EQ( m.getElement( 5, 3 ), 41 ); - EXPECT_EQ( m.getElement( 5, 4 ), 20 ); + test_AddElement< MatrixType >(); } -template< typename Matrix > -void test_VectorProduct() +TYPED_TEST( MatrixTest, vectorProductTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - - /* - * Sets up the following 4x4 sparse matrix: - * - * / 1 0 0 0 \ - * | 0 2 0 3 | - * | 0 4 0 0 | - * \ 0 0 5 0 / - */ - - const IndexType m_rows_1 = 4; - const IndexType m_cols_1 = 4; - - Matrix m_1; - m_1.reset(); - m_1.setDimensions( m_rows_1, m_cols_1 ); - typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 }; - m_1.setCompressedRowLengths( rowLengths_1 ); - - RealType value_1 = 1; - m_1.setElement( 0, 0, value_1++ ); // 0th row - - m_1.setElement( 1, 1, value_1++ ); // 1st row - m_1.setElement( 1, 3, value_1++ ); - - m_1.setElement( 2, 1, value_1++ ); // 2nd row - - m_1.setElement( 3, 2, value_1++ ); // 3rd row + using MatrixType = typename TestFixture::MatrixType; - VectorType inVector_1; - inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) - inVector_1.setElement( i, 2 ); - - VectorType outVector_1; - outVector_1.setSize( m_rows_1 ); - for( IndexType j = 0; j < outVector_1.getSize(); j++ ) - outVector_1.setElement( j, 0 ); - - m_1.vectorProduct( inVector_1, outVector_1 ); - - EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); - EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); - EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); - EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); - - /* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * \ 0 8 0 0 / - */ - - const IndexType m_rows_2 = 4; - const IndexType m_cols_2 = 4; - - Matrix m_2( m_rows_2, m_cols_2 ); - typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; - m_2.setCompressedRowLengths( rowLengths_2 ); - - RealType value_2 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_2.setElement( 0, i, value_2++ ); - - m_2.setElement( 1, 3, value_2++ ); // 1st row - - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_2.setElement( 2, i, value_2++ ); - - for( IndexType i = 1; i < 2; i++ ) // 3rd row - m_2.setElement( 3, i, value_2++ ); - - VectorType inVector_2; - inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) - inVector_2.setElement( i, 2 ); - - VectorType outVector_2; - outVector_2.setSize( m_rows_2 ); - for( IndexType j = 0; j < outVector_2.getSize(); j++ ) - outVector_2.setElement( j, 0 ); - - m_2.vectorProduct( inVector_2, outVector_2 ); - - EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); - EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); - EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); - - /* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 4 5 6 | - * | 7 8 9 0 | - * \ 0 10 11 12 / - */ - - const IndexType m_rows_3 = 4; - const IndexType m_cols_3 = 4; - - Matrix m_3( m_rows_3, m_cols_3 ); - typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; - m_3.setCompressedRowLengths( rowLengths_3 ); - - RealType value_3 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_3.setElement( 0, i, value_3++ ); - - for( IndexType i = 1; i < 4; i++ ) - m_3.setElement( 1, i, value_3++ ); // 1st row - - for( IndexType i = 0; i < 3; i++ ) // 2nd row - m_3.setElement( 2, i, value_3++ ); - - for( IndexType i = 1; i < 4; i++ ) // 3rd row - m_3.setElement( 3, i, value_3++ ); - - VectorType inVector_3; - inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) - inVector_3.setElement( i, 2 ); - - VectorType outVector_3; - outVector_3.setSize( m_rows_3 ); - for( IndexType j = 0; j < outVector_3.getSize(); j++ ) - outVector_3.setElement( j, 0 ); - - m_3.vectorProduct( inVector_3, outVector_3 ); - - EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); - EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); - EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); - EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 0 4 0 0 \ - * | 0 5 6 7 8 0 0 0 | - * | 9 10 11 12 13 0 0 0 | - * | 0 14 15 16 17 0 0 0 | - * | 0 0 18 19 20 21 0 0 | - * | 0 0 0 22 23 24 25 0 | - * | 26 27 28 29 30 0 0 0 | - * \ 31 32 33 34 35 0 0 0 / - */ - - const IndexType m_rows_4 = 8; - const IndexType m_cols_4 = 8; - - Matrix m_4( m_rows_4, m_cols_4 ); - typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; - m_4.setCompressedRowLengths( rowLengths_4 ); - - RealType value_4 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_4.setElement( 0, i, value_4++ ); - - m_4.setElement( 0, 5, value_4++ ); - - for( IndexType i = 1; i < 5; i++ ) // 1st row - m_4.setElement( 1, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 2nd row - m_4.setElement( 2, i, value_4++ ); - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_4.setElement( 3, i, value_4++ ); - - for( IndexType i = 2; i < 6; i++ ) // 4th row - m_4.setElement( 4, i, value_4++ ); - - for( IndexType i = 3; i < 7; i++ ) // 5th row - m_4.setElement( 5, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 6th row - m_4.setElement( 6, i, value_4++ ); - - for( IndexType i = 0; i < 5; i++ ) // 7th row - m_4.setElement( 7, i, value_4++ ); - - VectorType inVector_4; - inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) - inVector_4.setElement( i, 2 ); - - VectorType outVector_4; - outVector_4.setSize( m_rows_4 ); - for( IndexType j = 0; j < outVector_4.getSize(); j++ ) - outVector_4.setElement( j, 0 ); - - m_4.vectorProduct( inVector_4, outVector_4 ); - - EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); - EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); - EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); - EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); - EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); - EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); - EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); - EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); - - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ - - const IndexType m_rows_5 = 8; - const IndexType m_cols_5 = 8; - - Matrix m_5( m_rows_5, m_cols_5 ); - typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m_5.setCompressedRowLengths( rowLengths_5 ); - - RealType value_5 = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m_5.setElement( 0, i, value_5++ ); - - m_5.setElement( 0, 4, value_5++ ); // 0th row - m_5.setElement( 0, 5, value_5++ ); - - m_5.setElement( 1, 1, value_5++ ); // 1st row - m_5.setElement( 1, 3, value_5++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m_5.setElement( 2, i, value_5++ ); - - m_5.setElement( 2, 4, value_5++ ); // 2nd row - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m_5.setElement( 3, i, value_5++ ); - - m_5.setElement( 4, 1, value_5++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m_5.setElement( 5, i, value_5++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m_5.setElement( 6, i, value_5++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m_5.setElement( 7, i, value_5++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m_5.setElement( i, 7, 1); - - VectorType inVector_5; - inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) - inVector_5.setElement( i, 2 ); - - VectorType outVector_5; - outVector_5.setSize( m_rows_5 ); - for( IndexType j = 0; j < outVector_5.getSize(); j++ ) - outVector_5.setElement( j, 0 ); - - m_5.vectorProduct( inVector_5, outVector_5 ); - - EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); - EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); - EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); - EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); - EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); - EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); - EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); -} - -template< typename Matrix > -void test_RowsReduction() -{ - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ - - const IndexType rows = 8; - const IndexType cols = 8; - - Matrix m; - m.setDimensions( rows, cols ); - typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m.setCompressedRowLengths( rowsCapacities ); - - RealType value = 1; - for( IndexType i = 0; i < 3; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - m.setElement( 0, 4, value++ ); // 0th row - m.setElement( 0, 5, value++ ); - - m.setElement( 1, 1, value++ ); // 1st row - m.setElement( 1, 3, value++ ); - - for( IndexType i = 1; i < 3; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - m.setElement( 2, 4, value++ ); // 2nd row - - for( IndexType i = 1; i < 5; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - m.setElement( 4, 1, value++ ); // 4th row - - for( IndexType i = 1; i < 7; i++ ) // 5th row - m.setElement( 5, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 6th row - m.setElement( 6, i, value++ ); - - for( IndexType i = 0; i < 8; i++ ) // 7th row - m.setElement( 7, i, value++ ); - - for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows - m.setElement( i, 7, 1); - - //// - // Compute number of non-zero elements in rows. - typename Matrix::RowsCapacitiesType rowLengths( rows ); - auto rowLengths_view = rowLengths.getView(); - auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { - return ( value != 0.0 ); - }; - auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowLengths_view[ rowIdx ] = value; - }; - m.allRowsReduction( fetch, reduce, keep, 0 ); - EXPECT_EQ( rowsCapacities, rowLengths ); - m.getCompressedRowLengths( rowLengths ); - EXPECT_EQ( rowsCapacities, rowLengths ); - - //// - // Compute max norm - TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); - auto rowSums_view = rowSums.getView(); - auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { - return abs( value ); - }; - auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { - aux += a; - }; - auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { - rowSums_view[ rowIdx ] = value; - }; - m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); - const RealType maxNorm = TNL::max( rowSums ); - EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 + test_VectorProduct< MatrixType >(); } -template< typename Matrix > -void test_PerformSORIteration() +TYPED_TEST( MatrixTest, rowsReduction ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 4x4 sparse matrix: - * - * / 4 1 0 0 \ - * | 1 4 1 0 | - * | 0 1 4 1 | - * \ 0 0 1 4 / - */ + using MatrixType = typename TestFixture::MatrixType; - const IndexType m_rows = 4; - const IndexType m_cols = 4; - - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - m.setCompressedRowLengths( rowLengths ); - - m.setElement( 0, 0, 4.0 ); // 0th row - m.setElement( 0, 1, 1.0); - - m.setElement( 1, 0, 1.0 ); // 1st row - m.setElement( 1, 1, 4.0 ); - m.setElement( 1, 2, 1.0 ); - - m.setElement( 2, 1, 1.0 ); // 2nd row - m.setElement( 2, 2, 4.0 ); - m.setElement( 2, 3, 1.0 ); - - m.setElement( 3, 2, 1.0 ); // 3rd row - m.setElement( 3, 3, 4.0 ); - - RealType bVector [ 4 ] = { 1, 1, 1, 1 }; - RealType xVector [ 4 ] = { 1, 1, 1, 1 }; - - IndexType row = 0; - RealType omega = 1; - - m.performSORIteration( bVector, row++, xVector, omega); - - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 1.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); - - m.performSORIteration( bVector, row++, xVector, omega); - - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 1.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); - - m.performSORIteration( bVector, row++, xVector, omega); - - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 1.0 ); - - m.performSORIteration( bVector, row++, xVector, omega); - - EXPECT_EQ( xVector[ 0 ], 0.0 ); - EXPECT_EQ( xVector[ 1 ], 0.0 ); - EXPECT_EQ( xVector[ 2 ], 0.0 ); - EXPECT_EQ( xVector[ 3 ], 0.25 ); + test_RowsReduction< MatrixType >(); } -template< typename Matrix > -void test_SaveAndLoad( const char* filename ) +TYPED_TEST( MatrixTest, saveAndLoadTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using MatrixType = typename TestFixture::MatrixType; - /* - * Sets up the following 4x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 4 0 5 | - * | 6 7 8 0 | - * \ 0 9 10 11 / - */ - - const IndexType m_rows = 4; - const IndexType m_cols = 4; - - Matrix savedMatrix( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - savedMatrix.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - savedMatrix.setElement( 0, i, value++ ); - - savedMatrix.setElement( 1, 1, value++ ); - savedMatrix.setElement( 1, 3, value++ ); // 1st row - - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - savedMatrix.setElement( 2, i, value++ ); - - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - savedMatrix.setElement( 3, i, value++ ); - - ASSERT_NO_THROW( savedMatrix.save( filename ) ); - - Matrix loadedMatrix; - - ASSERT_NO_THROW( loadedMatrix.load( filename ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - - EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); - EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); - EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); - EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - - EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); - EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); - - EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); - EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); - EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); - EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - - EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); - EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); - EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); - EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); - - EXPECT_EQ( std::remove( filename ), 0 ); + test_SaveAndLoad< MatrixType >( saveAndLoadFileName ); } -template< typename Matrix > -void test_Print() +TYPED_TEST( MatrixTest, printTest ) { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; - - /* - * Sets up the following 5x4 sparse matrix: - * - * / 1 2 3 0 \ - * | 0 0 0 4 | - * | 5 6 7 0 | - * | 0 8 9 10 | - * \ 0 0 11 12 / - */ - - const IndexType m_rows = 5; - const IndexType m_cols = 4; - - Matrix m( m_rows, m_cols ); - typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - m.setCompressedRowLengths( rowLengths ); - - RealType value = 1; - for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row - m.setElement( 0, i, value++ ); - - m.setElement( 1, 3, value++ ); // 1st row - - for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row - m.setElement( 2, i, value++ ); - - for( IndexType i = 1; i < m_cols; i++ ) // 3rd row - m.setElement( 3, i, value++ ); - - for( IndexType i = 2; i < m_cols; i++ ) // 4th row - m.setElement( 4, i, value++ ); - - std::stringstream printed; - std::stringstream couted; - - //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); - - m.print( std::cout ); //all the std::cout goes to ss - - std::cout.rdbuf(old_buf); //reset - - couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" - "Row: 1 -> Col:3->4\t\n" - "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" - "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" - "Row: 4 -> Col:2->11 Col:3->12\t\n"; + using MatrixType = typename TestFixture::MatrixType; - EXPECT_EQ( printed.str(), couted.str() ); + test_Print< MatrixType >(); } -#endif +#endif \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8856547247adacff6923af9bd13c2d767c59e12 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -0,0 +1,1468 @@ +/*************************************************************************** + SparseMatrixTest.hpp - description + ------------------- + begin : Nov 22, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <iostream> +#include <sstream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_Constructors() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + Matrix m1( 5, 6 ); + EXPECT_EQ( m1.getRows(), 5 ); + EXPECT_EQ( m1.getColumns(), 6 ); + + Matrix m2( {1, 2, 2, 2, 1 }, 5 ); + typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; + m2.setElement( 0, 0, 1 ); // 0th row + m2.setElement( 1, 0, 1 ); // 1st row + m2.setElement( 1, 1, 1 ); + m2.setElement( 2, 1, 1 ); // 2nd row + m2.setElement( 2, 2, 1 ); + m2.setElement( 3, 2, 1 ); // 3rd row + m2.setElement( 3, 3, 1 ); + m2.setElement( 4, 4, 1 ); // 4th row + + EXPECT_EQ( m2.getElement( 0, 0 ), 1 ); // 0th row + EXPECT_EQ( m2.getElement( 1, 0 ), 1 ); // 1st row + EXPECT_EQ( m2.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m2.getElement( 2, 1 ), 1 ); // 2nd row + EXPECT_EQ( m2.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m2.getElement( 3, 2 ), 1 ); // 3rd row + EXPECT_EQ( m2.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m2.getElement( 4, 4 ), 1 ); // 4th row + + m2.getCompressedRowLengths( v1 ); + EXPECT_EQ( v1, v2 ); + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + + Matrix m3( 6, 5, { + { 0, 0, 1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 }, + { 1, 1, 4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 }, + { 2, 2, 7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 }, + { 3, 0, 10.0 }, + { 4, 1, 11.0 }, + { 5, 3, 12.0 } } ); + + // Check the set elements + EXPECT_EQ( m3.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m3.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m3.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m3.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m3.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m3.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m3.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m3.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m3.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m3.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m3.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m3.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m3.getElement( 5, 4 ), 0 ); + + std::map< std::pair< int, int >, float > map; + map[ { 0, 0 } ] = 1.0; + map[ { 0, 1 } ] = 2.0; + map[ { 0, 2 } ] = 3.0; + map[ { 1, 1 } ] = 4.0; + map[ { 1, 2 } ] = 5.0; + map[ { 1, 3 } ] = 6.0; + map[ { 2, 2 } ] = 7.0; + map[ { 2, 3 } ] = 8.0; + map[ { 2, 4 } ] = 9.0; + map[ { 3, 0 } ] = 10.0; + map[ { 4, 1 } ] = 11.0; + map[ { 5, 3 } ] = 12.0; + Matrix m4( 6, 5, map ); + + // Check the matrix elements + EXPECT_EQ( m4.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m4.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m4.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m4.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m4.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m4.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m4.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m4.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m4.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m4.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m4.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m4.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m4.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m4.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m4.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m4.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m4.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m4.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m4.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m4.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m4.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m4.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m4.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m4.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m4.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m4.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m4.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m4.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m4.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m4.getElement( 5, 4 ), 0 ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); + + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); + + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: + val = 1; + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, val++ ); + break; + case 1: + val = 5; + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, val++ ); + break; + case 2: + val = 8; + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, val++ ); + break; + case 3: + val = 16; + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, val++ ); + break; + case 4: + row.setElement( 0, 0, 18 ); + break; + case 5: + row.setElement( 0, 0, 19 ); + break; + case 6: + row.setElement( 0, 0, 20 ); + break; + case 7: + row.setElement( 0, 0, 21 ); + break; + case 8: + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + case 9: + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 1 1 0 0 | + * | 0 11 1 1 0 | + * \ 0 0 1 12 1 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, + { 1, 1, 4 }, { 1, 2, 5 }, { 1, 3, 6 }, + { 2, 2, 7 }, { 2, 3, 8 }, { 2, 4, 9 }, + { 3, 0, 10 }, { 3, 1, 1 }, { 3, 2, 1 }, + { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3, 1 }, + { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } ); + /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 3, 0, value++ ); // 3rd row + + m.setElement( 4, 1, value++ ); // 4th row + + m.setElement( 5, 3, value++ ); // 5th row*/ + + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 1 ); + EXPECT_EQ( m.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m.getElement( 5, 4 ), 1 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 0 0 \ + * | 0 12 15 18 0 | + * | 0 0 21 24 27 | + * | 30 13 14 0 0 | + * | 0 35 16 17 0 | + * \ 0 0 18 41 20 / + */ + + RealType newValue = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.addElement( 0, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.addElement( 1, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.addElement( 2, i, newValue++, 2.0 ); + + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row + m.addElement( 3, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row + m.addElement( 4, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 5th row + m.addElement( 5, i, newValue++, 2.0 ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 12 ); + EXPECT_EQ( m.getElement( 1, 2 ), 15 ); + EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 21 ); + EXPECT_EQ( m.getElement( 2, 3 ), 24 ); + EXPECT_EQ( m.getElement( 2, 4 ), 27 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); + EXPECT_EQ( m.getElement( 3, 1 ), 13 ); + EXPECT_EQ( m.getElement( 3, 2 ), 14 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 35 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 18 ); + EXPECT_EQ( m.getElement( 5, 3 ), 41 ); + EXPECT_EQ( m.getElement( 5, 4 ), 20 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 }; + m_1.setCompressedRowLengths( rowLengths_1 ); + + RealType value_1 = 1; + m_1.setElement( 0, 0, value_1++ ); // 0th row + + m_1.setElement( 1, 1, value_1++ ); // 1st row + m_1.setElement( 1, 3, value_1++ ); + + m_1.setElement( 2, 1, value_1++ ); // 2nd row + + m_1.setElement( 3, 2, value_1++ ); // 3rd row + + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); + + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); + + m_1.vectorProduct( inVector_1, outVector_1 ); + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; + m_2.setCompressedRowLengths( rowLengths_2 ); + + RealType value_2 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, value_2++ ); + + m_2.setElement( 1, 3, value_2++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, value_2++ ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, value_2++ ); + + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); + + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; + m_3.setCompressedRowLengths( rowLengths_3 ); + + RealType value_3 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, value_3++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, value_3++ ); + + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); + + m_3.vectorProduct( inVector_3, outVector_3 ); + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; + m_4.setCompressedRowLengths( rowLengths_4 ); + + RealType value_4 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, value_4++ ); + + m_4.setElement( 0, 5, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, value_4++ ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, value_4++ ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, value_4++ ); + + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); + + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + + m_4.vectorProduct( inVector_4, outVector_4 ); + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, value_5++ ); + + m_5.setElement( 0, 4, value_5++ ); // 0th row + m_5.setElement( 0, 5, value_5++ ); + + m_5.setElement( 1, 1, value_5++ ); // 1st row + m_5.setElement( 1, 3, value_5++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, value_5++ ); + + m_5.setElement( 2, 4, value_5++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, value_5++ ); + + m_5.setElement( 4, 1, value_5++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, value_5++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * \ 0 9 10 11 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, value++ ); + + savedMatrix.setElement( 1, 1, value++ ); + savedMatrix.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" + "Row: 1 -> Col:3->4\t\n" + "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" + "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" + "Row: 4 -> Col:2->11 Col:3->12\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index a72d548f5bdc98c6fbd7920507b4c1978f58ef00..2898a46f1a30a097cefd0a1e0487e68382e1b150 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -8,26 +8,17 @@ /* See Copyright Notice in tnl/Copyright */ +#include <iostream> #include <TNL/Containers/Segments/CSR.h> #include <TNL/Matrices/SparseMatrix.h> - -#include "SparseMatrixTest.h" -#include <iostream> - #ifdef HAVE_GTEST #include <gtest/gtest.h> -// test fixture for typed tests -template< typename Matrix > -class CSRMatrixTest : public ::testing::Test -{ -protected: - using CSRMatrixType = Matrix; -}; +const char* saveAndLoadFileName = "test_SparseMatrixTest_CSR_segments"; // types for which MatrixTest is instantiated -using CSRMatrixTypes = ::testing::Types +using MatrixTypes = ::testing::Types < TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, @@ -49,93 +40,7 @@ using CSRMatrixTypes = ::testing::Types #endif >; -TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); - -TYPED_TEST( CSRMatrixTest, Constructors ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_Constructors< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, setDimensionsTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SetDimensions< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SetCompressedRowLengths< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, setLikeTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SetLike< CSRMatrixType, CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, resetTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_Reset< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, getRowTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_GetRow< CSRMatrixType >(); -} - - -TYPED_TEST( CSRMatrixTest, setElementTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SetElement< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, addElementTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_AddElement< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, vectorProductTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_VectorProduct< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, rowsReduction ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_RowsReduction< CSRMatrixType >(); -} - -TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" ); -} - -TYPED_TEST( CSRMatrixTest, printTest ) -{ - using CSRMatrixType = typename TestFixture::CSRMatrixType; - - test_Print< CSRMatrixType >(); -} - #endif +#include "SparseMatrixTest.h" #include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..667b06513fc9df997e61b3a0491ecbf17d72f31c --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_ChunkedEllpack.cpp - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixTest_ChunkedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..6310042471caa4c2a6852ac42a875da9423b08f8 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_ChunkedEllpack.cu - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixTest_ChunkedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c35dac6e43755e83699e627c91d2c798a73b69 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h @@ -0,0 +1,57 @@ +/*************************************************************************** + SparseMatrixTest_ChunkedEllpack.h - description + ------------------- + begin : Mar 21, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <iostream> +#include <TNL/Containers/Segments/ChunkedEllpack.h> +#include <TNL/Matrices/SparseMatrix.h> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +const char* saveAndLoadFileName = "test_SparseMatrixTest_ChunkedEllpack_segments"; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, true >; + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, false >; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorChunkedEllpack > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorChunkedEllpack > +#endif +>; + +#endif + +#include "SparseMatrixTest.h" +#include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 2a890e694f4ca90edc7aa3b98fba56f666c2097d..aed4d1ac16b7ebf6c2356d10305df08a5ff92340 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -8,37 +8,27 @@ /* See Copyright Notice in tnl/Copyright */ +#include <iostream> #include <TNL/Containers/Segments/Ellpack.h> #include <TNL/Matrices/SparseMatrix.h> - -#include "SparseMatrixTest.h" -#include <iostream> - #ifdef HAVE_GTEST #include <gtest/gtest.h> -// test fixture for typed tests -template< typename Matrix > -class EllpackMatrixTest : public ::testing::Test -{ -protected: - using EllpackMatrixType = Matrix; -}; +const char* saveAndLoadFileName = "test_SparseMatrixTest_Ellpack_segments"; //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAlocator > using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; - //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated -using EllpackMatrixTypes = ::testing::Types +using MatrixTypes = ::testing::Types < TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, @@ -60,99 +50,6 @@ using EllpackMatrixTypes = ::testing::Types #endif >; -TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes); - -TYPED_TEST( EllpackMatrixTest, Constructors ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_Constructors< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_SetDimensions< EllpackMatrixType >(); -} - -//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest ) -//{ -//// using EllpackMatrixType = typename TestFixture::EllpackMatrixType; -// -//// test_SetCompressedRowLengths< EllpackMatrixType >(); -// -// bool testRan = false; -// EXPECT_TRUE( testRan ); -// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -// std::cout << " This test is dependent on the input format. \n"; -// std::cout << " Almost every format allocates elements per row differently.\n\n"; -// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; -//} - -TYPED_TEST( EllpackMatrixTest, setLikeTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_SetLike< EllpackMatrixType, EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, resetTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_Reset< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, getRowTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_GetRow< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, setElementTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_SetElement< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, addElementTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_AddElement< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, vectorProductTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_VectorProduct< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, rowsReduction ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_RowsReduction< EllpackMatrixType >(); -} - -TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" ); -} - -TYPED_TEST( EllpackMatrixTest, printTest ) -{ - using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - - test_Print< EllpackMatrixType >(); -} - #endif #include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 17b48dcf461e4b8e99a9d1d9172ded8301b20038..96115a25c800e6742ba83359e0c3b818ba78b41b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -8,38 +8,28 @@ /* See Copyright Notice in tnl/Copyright */ +#include <iostream> #include <TNL/Containers/Segments/SlicedEllpack.h> #include <TNL/Matrices/SparseMatrix.h> #include <TNL/Matrices/MatrixType.h> - -#include "SparseMatrixTest.h" -#include <iostream> - #ifdef HAVE_GTEST #include <gtest/gtest.h> -// test fixture for typed tests -template< typename Matrix > -class SlicedEllpackMatrixTest : public ::testing::Test -{ -protected: - using SlicedEllpackMatrixType = Matrix; -}; +const char* saveAndLoadFileName = "test_SparseMatrixTest_SlicedEllpack_segments"; //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAllocator > using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; - //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; // types for which MatrixTest is instantiated -using SlicedEllpackMatrixTypes = ::testing::Types +using MatrixTypes = ::testing::Types < TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, @@ -61,92 +51,6 @@ using SlicedEllpackMatrixTypes = ::testing::Types #endif >; -TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); - -TYPED_TEST( SlicedEllpackMatrixTest, Constructors ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_Constructors< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SetDimensions< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, resetTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_Reset< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, getRowTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_GetRow< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, setElementTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SetElement< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, addElementTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_AddElement< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_VectorProduct< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_RowsReduction< SlicedEllpackMatrixType >(); -} - -TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" ); -} - -TYPED_TEST( SlicedEllpackMatrixTest, printTest ) -{ - using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - - test_Print< SlicedEllpackMatrixType >(); -} - #endif #include "../main.h"