diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ed064eb1de0ebce012daf328173b420ada0f0a4..fc377eee6cdbe9dfbbf7710ebfc4038a8d08417a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -409,10 +409,6 @@ endif() # endif() #endif() -if( OPTIMIZED_VECTOR_HOST_OPERATIONS STREQUAL "yes" ) - AddCompilerFlag( "-DOPTIMIZED_VECTOR_HOST_OPERATIONS " ) -endif() - CONFIGURE_FILE( "tnlConfig.h.in" "${PROJECT_BUILD_PATH}/TNL/tnlConfig.h" ) INSTALL( FILES ${PROJECT_BUILD_PATH}/TNL/tnlConfig.h DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY} ) diff --git a/build b/build index e0c8dbb993e592c420aa62abd991c189fbff4870..3e2983dcee398c1af61f36ab34166bbf71f11620 100755 --- a/build +++ b/build @@ -34,7 +34,6 @@ INSTANTIATE_INT="yes" INSTANTIATE_LONG_DOUBLE="no" INSTANTIATE_DOUBLE="yes" INSTANTIATE_FLOAT="no" -OPTIMIZED_VECTOR_HOST_OPERATIONS="no" for option in "$@" do @@ -75,7 +74,6 @@ do INSTANTIATE_DOUBLE="yes" INSTANTIATE_FLOAT="no" WITH_CUDA_ARCH="auto" ;; - --optimize-vector-host-operations=* ) OPTIMIZED_VECTOR_HOST_OPERATIONS="yes" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; @@ -175,7 +173,6 @@ cmake_command=( -DINSTANTIATE_LONG_DOUBLE=${INSTANTIATE_LONG_DOUBLE} -DINSTANTIATE_INT=${INSTANTIATE_INT} -DINSTANTIATE_LONG_INT=${INSTANTIATE_LONG_INT} - -DOPTIMIZED_VECTOR_HOST_OPERATIONS=${OPTIMIZED_VECTOR_HOST_OPERATIONS} ) # Skip running cmake if it was already run and the cmake command is the same. diff --git a/src/Benchmarks/BLAS/vector-operations.h b/src/Benchmarks/BLAS/vector-operations.h index e65f8980b1066e042206e328d15b50e32c81432f..e5b6f5aee648ac89f1ec44656a3ce67b8043c24d 100644 --- a/src/Benchmarks/BLAS/vector-operations.h +++ b/src/Benchmarks/BLAS/vector-operations.h @@ -87,23 +87,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto maxHost = [&]() { resultHost = hostVector.max(); }; - auto maxHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionMax< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto maxCuda = [&]() { resultDevice = deviceVector.max(); }; benchmark.setOperation( "max", datasetSize ); benchmark.time( reset1, "CPU", maxHost ); - benchmark.time( reset1, "CPU (general)", maxHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", maxCuda ); #endif @@ -112,23 +100,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto minHost = [&]() { resultHost = hostVector.min(); }; - auto minHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionMin< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto minCuda = [&]() { resultDevice = deviceVector.min(); }; benchmark.setOperation( "min", datasetSize ); benchmark.time( reset1, "CPU", minHost ); - benchmark.time( reset1, "CPU (general)", minHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", minCuda ); #endif @@ -137,17 +113,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMaxHost = [&]() { resultHost = hostVector.absMax(); }; - auto absMaxHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionAbsMax< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; @@ -162,7 +127,6 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "absMax", datasetSize ); benchmark.time( reset1, "CPU", absMaxHost ); - benchmark.time( reset1, "CPU (general)", absMaxHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", absMaxCuda ); benchmark.time( reset1, "cuBLAS", absMaxCublas ); @@ -172,17 +136,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto absMinHost = [&]() { resultHost = hostVector.absMin(); }; - auto absMinHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionAbsMin< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; @@ -197,7 +150,6 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "absMin", datasetSize ); benchmark.time( reset1, "CPU", absMinHost ); - benchmark.time( reset1, "CPU (general)", absMinHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", absMinCuda ); benchmark.time( reset1, "cuBLAS", absMinCublas ); @@ -207,23 +159,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto sumHost = [&]() { resultHost = hostVector.sum(); }; - auto sumHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionSum< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto sumCuda = [&]() { resultDevice = deviceVector.sum(); }; benchmark.setOperation( "sum", datasetSize ); benchmark.time( reset1, "CPU", sumHost ); - benchmark.time( reset1, "CPU (general)", sumHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", sumCuda ); #endif @@ -232,17 +172,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l1normHost = [&]() { resultHost = hostVector.lpNorm( 1.0 ); }; - auto l1normHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionAbsSum< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; @@ -255,7 +184,6 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l1 norm", datasetSize ); benchmark.time( reset1, "CPU", l1normHost ); - benchmark.time( reset1, "CPU (general)", l1normHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", l1normCuda ); benchmark.time( reset1, "cuBLAS", l1normCublas ); @@ -265,17 +193,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; - auto l2normHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionL2Norm< Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; @@ -288,7 +205,6 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "l2 norm", datasetSize ); benchmark.time( reset1, "CPU", l2normHost ); - benchmark.time( reset1, "CPU (general)", l2normHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", l2normCuda ); benchmark.time( reset1, "cuBLAS", l2normCublas ); @@ -298,24 +214,11 @@ benchmarkVectorOperations( Benchmark & benchmark, auto l3normHost = [&]() { resultHost = hostVector.lpNorm( 3.0 ); }; - auto l3normHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionLpNorm< Real > operation; - operation.setPower( 3.0 ); - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - ( Real* ) 0, - result ); - return result; - }; auto l3normCuda = [&]() { resultDevice = deviceVector.lpNorm( 3.0 ); }; benchmark.setOperation( "l3 norm", datasetSize ); benchmark.time( reset1, "CPU", l3normHost ); - benchmark.time( reset1, "CPU (general)", l3normHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", l3normCuda ); #endif @@ -324,17 +227,6 @@ benchmarkVectorOperations( Benchmark & benchmark, auto scalarProductHost = [&]() { resultHost = hostVector.scalarProduct( hostVector2 ); }; - auto scalarProductHostGeneral = [&]() { - Real result( 0 ); - Containers::Algorithms::ParallelReductionScalarProduct< Real, Real > operation; - Containers::Algorithms::Reduction< Devices::Host >::reduce( - operation, - hostVector.getSize(), - hostVector.getData(), - hostVector2.getData(), - result ); - return result; - }; auto scalarProductCuda = [&]() { resultDevice = deviceVector.scalarProduct( deviceVector2 ); }; @@ -348,7 +240,6 @@ benchmarkVectorOperations( Benchmark & benchmark, #endif benchmark.setOperation( "scalar product", 2 * datasetSize ); benchmark.time( reset1, "CPU", scalarProductHost ); - benchmark.time( reset1, "CPU (general)", scalarProductHostGeneral ); #ifdef HAVE_CUDA benchmark.time( reset1, "GPU", scalarProductCuda ); benchmark.time( reset1, "cuBLAS", scalarProductCublas ); diff --git a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h index 6a29ec9380f0d5b04d31f42b431ffb93d66bdd68..e63e431c67a39eed8a06993f71605db48d8e39c5 100644 --- a/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +++ b/src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h @@ -514,7 +514,7 @@ getExplicitUpdate( const RealType& time, cell.getBasis(), gridXIdx, gridYIdx ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; //std::cerr << "Computing the heat equation ..." << std::endl; @@ -534,7 +534,7 @@ getExplicitUpdate( const RealType& time, cell.getBasis(), gridXIdx, gridYIdx ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; } if( this->cudaKernelType == "templated" ) diff --git a/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h index 4d8af10804463cfec56226ec56198fb456d69177..4b546be64fdd4f2b225bf5a358d13c5e851a7d89 100644 --- a/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h +++ b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation-bug.h @@ -47,7 +47,7 @@ int main( int argc, char* argv[] ) while( iteration < 10000 ) { testKernel< GridEntity ><<< cudaGridSize, cudaBlockSize >>>(); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); iteration++; } auto t_stop = std::chrono::high_resolution_clock::now(); diff --git a/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h index e8798609898c03831a165b9173dccadffdd4a1ba..33dff1ded6f5968f812236d2dd65b23a2f1c2fc9 100644 --- a/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h +++ b/src/Benchmarks/HeatEquation/tnl-benchmark-simple-heat-equation.h @@ -379,7 +379,7 @@ bool solveHeatEquationCuda( const Config::ParameterContainer& parameters, return false; } - cudaThreadSynchronize(); + cudaDeviceSynchronize(); cudaMemcpy( max_du, cuda_max_du, cudaUpdateBlocks.x * sizeof( Real ), cudaMemcpyDeviceToHost ); if( ( cudaErr = cudaGetLastError() ) != cudaSuccess ) { diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index d5ad82ecbc2c5b6709b15fd38ae7d1465e919ee6..ad3c8d61c0ef09f084bf4b5ba392224ab2786ea3 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -250,7 +250,7 @@ double benchmarkMatrix( const Matrix& matrix, matrix.vectorProduct( x, b ); #ifdef HAVE_CUDA if( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value ) - cudaThreadSynchronize(); + cudaDeviceSynchronize(); #endif time = timer.getRealTime(); iterations++; diff --git a/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h b/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h index cfb271d7d7c4090cb6fcfcb67766d0115757d1d9..8f6d376fe27ebed3cd67307bf8f24ea2c5d630d4 100644 --- a/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h +++ b/src/Benchmarks/SpMV/tnlCusparseCSRMatrix.h @@ -60,7 +60,7 @@ class CusparseCSRBase void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA cusparseDcsrmv( *( this->cusparseHandle ), CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -103,7 +103,7 @@ class CusparseCSR< double > : public CusparseCSRBase< double > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, "" ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA double d = 1.0; double* alpha = &d; @@ -134,7 +134,7 @@ class CusparseCSR< float > : public CusparseCSRBase< float > void vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( matrix, "" ); + TNL_ASSERT_TRUE( matrix, "matrix was not initialized" ); #ifdef HAVE_CUDA float d = 1.0; float* alpha = &d; diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 7f8aa040478eec2b3628d7fab6262c415d083ec0..6788d1a68ffd738aef1ca395af9d88b3082b98bc 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -51,6 +51,8 @@ void export_Matrix( py::module & m, const char* name ) using VectorType = TNL::Containers::Vector< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >; + void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVector&) const = &Matrix::getCompressedRowLengths; + auto matrix = py::class_< Matrix, TNL::Object >( m, name ) .def(py::init<>()) // overloads (defined in Object) @@ -69,7 +71,7 @@ void export_Matrix( py::module & m, const char* name ) .def("setDimensions", &Matrix::setDimensions) .def("setCompressedRowLengths", &Matrix::setCompressedRowLengths) .def("getRowLength", &Matrix::getRowLength) - .def("getCompressedRowLengths", &Matrix::getCompressedRowLengths) + .def("getCompressedRowLengths", _getCompressedRowLengths) // TODO: export for more types .def("setLike", &Matrix::template setLike< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >) .def("getNumberOfMatrixElements", &Matrix::getNumberOfMatrixElements) diff --git a/src/TNL/CMakeLists.txt b/src/TNL/CMakeLists.txt index cd07ae65910ab69bf004f69bb84231caf78aaab3..306bd82a3c5c6633c893beafa90aa768f0e83bee 100644 --- a/src/TNL/CMakeLists.txt +++ b/src/TNL/CMakeLists.txt @@ -14,8 +14,6 @@ ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Problems ) ADD_SUBDIRECTORY( Solvers ) -ADD_SUBDIRECTORY( legacy ) - SET( CURRENT_DIR ${CMAKE_SOURCE_DIR}/src/TNL ) set( headers @@ -57,8 +55,6 @@ set( tnl_SOURCES ${tnl_config_SOURCES} ${tnl_pointers_SOURCES} ${tnl_solvers_SOURCES} - ${tnl_legacy_SOURCES} - ${common_SOURCES} ) set( tnl_CUDA__SOURCES ${tnl_config_CUDA__SOURCES} @@ -73,7 +69,6 @@ set( tnl_CUDA__SOURCES ${tnl_config_CUDA__SOURCES} ${tnl_problems_CUDA__SOURCES} ${tnl_solvers_CUDA__SOURCES} - ${tnl_legacy_CUDA__SOURCES} ${common_SOURCES} ) diff --git a/src/TNL/Communicators/MpiCommunicator.h b/src/TNL/Communicators/MpiCommunicator.h index c233004a602f31ce8b7220b9983c9541f47f6331..1ad8a6e088445fe76a0aeab3bc0bf68cd00c6943 100644 --- a/src/TNL/Communicators/MpiCommunicator.h +++ b/src/TNL/Communicators/MpiCommunicator.h @@ -314,12 +314,12 @@ class MpiCommunicator } template< typename T > - static void Bcast( T& data, int count, int root,CommunicationGroup group) + static void Bcast( T* data, int count, int root, CommunicationGroup group) { #ifdef HAVE_MPI TNL_ASSERT_TRUE(IsInitialized(), "Fatal Error - MPI communicator is not initialized"); TNL_ASSERT_NE(group, NullGroup, "BCast cannot be called with NullGroup"); - MPI_Bcast((void*) &data, count, MPIDataType(data), root, group); + MPI_Bcast((void*) data, count, MPIDataType(data), root, group); #else throw Exceptions::MPISupportMissing(); #endif @@ -340,6 +340,21 @@ class MpiCommunicator #endif } + // in-place variant of Allreduce + template< typename T > + static void Allreduce( T* data, + int count, + const MPI_Op &op, + CommunicationGroup group) + { +#ifdef HAVE_MPI + TNL_ASSERT_NE(group, NullGroup, "Allreduce cannot be called with NullGroup"); + MPI_Allreduce( MPI_IN_PLACE, (void*) data,count,MPIDataType(data),op,group); +#else + throw Exceptions::MPISupportMissing(); +#endif + } + template< typename T > static void Reduce( const T* data, diff --git a/src/TNL/Communicators/NoDistrCommunicator.h b/src/TNL/Communicators/NoDistrCommunicator.h index aac58b916bf17656e9d6c33bead7a4d37441fca7..33bbe01a0d289a74d74af23195ee4d7a60c87366 100644 --- a/src/TNL/Communicators/NoDistrCommunicator.h +++ b/src/TNL/Communicators/NoDistrCommunicator.h @@ -93,8 +93,8 @@ class NoDistrCommunicator { } - template< typename T > - static void Bcast( T& data, int count, int root, CommunicationGroup group) + template< typename T > + static void Bcast( T* data, int count, int root, CommunicationGroup group) { } @@ -108,6 +108,15 @@ class NoDistrCommunicator memcpy( ( void* ) reduced_data, ( const void* ) data, count * sizeof( T ) ); } + // in-place variant of Allreduce + template< typename T > + static void Allreduce( T* data, + int count, + const MPI_Op &op, + CommunicationGroup group ) + { + } + template< typename T > static void Reduce( T* data, T* reduced_data, diff --git a/src/TNL/Containers/Algorithms/ArrayOperations.h b/src/TNL/Containers/Algorithms/ArrayOperations.h index ad852e10f78e5b4e08d7cf66abf7071f33e5e73e..47050d32fd8f037251e3fe5258c98fe4d5f90b2c 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperations.h +++ b/src/TNL/Containers/Algorithms/ArrayOperations.h @@ -42,14 +42,14 @@ class ArrayOperations< Devices::Host > static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > - static bool setMemory( Element* data, + static void setMemory( Element* data, const Element& value, const Index size ); template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -93,14 +93,14 @@ class ArrayOperations< Devices::Cuda > static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > - static bool setMemory( Element* data, + static void setMemory( Element* data, const Element& value, const Index size ); template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -132,7 +132,7 @@ class ArrayOperations< Devices::Cuda, Devices::Host > template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -152,7 +152,7 @@ class ArrayOperations< Devices::Host, Devices::Cuda > template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -185,14 +185,14 @@ class ArrayOperations< Devices::MIC > static Element getMemoryElement( const Element* data ); template< typename Element, typename Index > - static bool setMemory( Element* data, + static void setMemory( Element* data, const Element& value, const Index size ); template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -224,7 +224,7 @@ class ArrayOperations< Devices::MIC, Devices::Host > template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); @@ -244,7 +244,7 @@ class ArrayOperations< Devices::Host, Devices::MIC > template< typename DestinationElement, typename SourceElement, typename Index > - static bool copyMemory( DestinationElement* destination, + static void copyMemory( DestinationElement* destination, const SourceElement* source, const Index size ); diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h index bca6bdb0479eb38a329234f92305421873864dc5..9a87b52874f6f34f6f788362623060804052399b 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h @@ -8,9 +8,10 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once #include <iostream> +#include <memory> #include <TNL/tnlConfig.h> #include <TNL/Math.h> @@ -21,7 +22,7 @@ #include <TNL/Containers/Algorithms/ReductionOperations.h> namespace TNL { -namespace Containers { +namespace Containers { namespace Algorithms { template< typename Element, typename Index > @@ -99,7 +100,7 @@ setArrayValueCudaKernel( Element* data, #endif template< typename Element, typename Index > -bool +void ArrayOperations< Devices::Cuda >:: setMemory( Element* data, const Element& value, @@ -112,7 +113,7 @@ setMemory( Element* data, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); setArrayValueCudaKernel<<< gridSize, blockSize >>>( data, size, value ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); #endif @@ -140,7 +141,7 @@ copyMemoryCudaToCudaKernel( DestinationElement* destination, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::Cuda >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -155,7 +156,7 @@ copyMemory( DestinationElement* destination, source, size * sizeof( DestinationElement ), cudaMemcpyDeviceToDevice ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; } else { @@ -164,7 +165,7 @@ copyMemory( DestinationElement* destination, Index blocksNumber = ceil( ( double ) size / ( double ) blockSize. x ); gridSize. x = min( blocksNumber, Devices::Cuda::getMaxGridSize() ); copyMemoryCudaToCudaKernel<<< gridSize, blockSize >>>( destination, source, size ); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; } #else throw Exceptions::CudaSupportMissing(); @@ -182,11 +183,8 @@ compareMemory( const Element1* destination, { TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); - //TODO: The parallel reduction on the CUDA device with different element types is needed. - bool result = false; Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities; - Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source, result ); - return result; + return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source ); } template< typename Element, @@ -200,11 +198,9 @@ containsValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; - bool result = false; Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue; reductionContainsValue.setValue( value ); - Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, 0, result ); - return result; + return Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, nullptr ); } template< typename Element, @@ -218,11 +214,9 @@ containsOnlyValue( const Element* data, TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." ); TNL_ASSERT_GE( size, 0, "" ); if( size == 0 ) return false; - bool result = false; Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue; reductionContainsOnlyValue.setValue( value ); - Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, 0, result ); - return result; + return Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, nullptr ); } @@ -232,7 +226,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::Host, Devices::Cuda >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -248,23 +242,20 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; } else { - SourceElement* buffer = new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< SourceElement[] > buffer{ new SourceElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); while( i < size ) { - if( cudaMemcpy( buffer, - &source[ i ], + if( cudaMemcpy( (void*) buffer.get(), + (void*) &source[ i ], min( size - i, Devices::Cuda::getGPUTransferBufferSize() ) * sizeof( SourceElement ), cudaMemcpyDeviceToHost ) != cudaSuccess ) - { - delete[] buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; - } + TNL_CHECK_CUDA_DEVICE; Index j( 0 ); while( j < Devices::Cuda::getGPUTransferBufferSize() && i + j < size ) { @@ -273,9 +264,7 @@ copyMemory( DestinationElement* destination, } i += j; } - delete[] buffer; } - return true; #else throw Exceptions::CudaSupportMissing(); #endif @@ -298,28 +287,21 @@ compareMemory( const Element1* destination, TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." ); TNL_ASSERT_GE( size, 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA - Element2* host_buffer = new Element2[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< Element2[] > host_buffer{ new Element2[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index compared( 0 ); while( compared < size ) { Index transfer = min( size - compared, Devices::Cuda::getGPUTransferBufferSize() ); - if( cudaMemcpy( ( void* ) host_buffer, - ( void* ) & ( source[ compared ] ), + if( cudaMemcpy( (void*) host_buffer.get(), + (void*) &source[ compared ], transfer * sizeof( Element2 ), cudaMemcpyDeviceToHost ) != cudaSuccess ) - { - delete[] host_buffer; std::cerr << "Transfer of data from CUDA device to host failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; - } - if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer, transfer ) ) - { - delete[] host_buffer; + TNL_CHECK_CUDA_DEVICE; + if( ! ArrayOperations< Devices::Host >::compareMemory( &destination[ compared ], host_buffer.get(), transfer ) ) return false; - } compared += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); @@ -332,7 +314,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::Cuda, Devices::Host >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -349,11 +331,11 @@ copyMemory( DestinationElement* destination, size * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; } else { - DestinationElement* buffer = new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ]; + std::unique_ptr< DestinationElement[] > buffer{ new DestinationElement[ Devices::Cuda::getGPUTransferBufferSize() ] }; Index i( 0 ); while( i < size ) { @@ -363,19 +345,14 @@ copyMemory( DestinationElement* destination, buffer[ j ] = source[ i + j ]; j++; } - if( cudaMemcpy( &destination[ i ], - buffer, + if( cudaMemcpy( (void*) &destination[ i ], + (void*) buffer.get(), j * sizeof( DestinationElement ), cudaMemcpyHostToDevice ) != cudaSuccess ) - { - delete[] buffer; std::cerr << "Transfer of data from host to CUDA device failed." << std::endl; - return TNL_CHECK_CUDA_DEVICE; - } + TNL_CHECK_CUDA_DEVICE; i += j; } - delete[] buffer; - return true; } #else throw Exceptions::CudaSupportMissing(); @@ -397,235 +374,6 @@ compareMemory( const Element1* hostData, return ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory( deviceData, hostData, size ); } -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION - -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< char, int >( char*& data, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< int, int >( int*& data, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int, int >( long int*& data, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< float, int >( float*& data, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< double, int >( double*& data, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, int >( long double*& data, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< char, long int >( char*& data, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< int, long int >( int*& data, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long int, long int >( long int*& data, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< float, long int >( float*& data, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< double, long int >( double*& data, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::allocateMemory< long double, long int >( long double*& data, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< char >( char* data ); -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< int >( int* data ); -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< long int >( long int* data ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< float >( float* data ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< double >( double* data ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::freeMemory< long double >( long double* data ); -#endif - -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< char >( char* data, const char& value ); -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< int >( int* data, const int& value ); -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< long int >( long int* data, const long int& value ); -#ifdef INSTANTIATE_FLOAT -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< float >( float* data, const float& value ); -#endif -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< double >( double* data, const double& value ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template void ArrayOperations< Devices::Cuda >::setMemoryElement< long double >( long double* data, const long double& value ); -#endif - -extern template char ArrayOperations< Devices::Cuda >::getMemoryElement< char >( const char* data ); -extern template int ArrayOperations< Devices::Cuda >::getMemoryElement< int >( const int* data ); -extern template long int ArrayOperations< Devices::Cuda >::getMemoryElement< long int >( const long int* data ); -#ifdef INSTANTIATE_FLOAT -extern template float ArrayOperations< Devices::Cuda >::getMemoryElement< float >( const float* data ); -#endif -extern template double ArrayOperations< Devices::Cuda >::getMemoryElement< double >( const double* data ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template long double ArrayOperations< Devices::Cuda >::getMemoryElement< long double >( const long double* data ); -#endif - -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< char, char, int >( char* destination, const char* source, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< int, int, int >( int* destination, const int* source, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long int, long int, int >( long int* destination, const long int* source, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< float, float, int >( float* destination, const float* source, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< double, double, int >( double* destination, const double* source, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< char, char, long int >( char* destination, const char* source, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< int, int, long int >( int* destination, const int* source, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long int, long int, long int >( long int* destination, const long int* source, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< float, float, long int >( float* destination, const float* source, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< double, double, long int >( double* destination, const double* source, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char, char, int >( char* destination, const char* source, const int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int, int, int >( int* destination, const int* source, const int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int, long int, int >( long int* destination, const long int* source, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float, float, int >( float* destination, const float* source, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double, double, int >( double* destination, const double* source, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< char, char, long int >( char* destination, const char* source, const long int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< int, int, long int >( int* destination, const int* source, const long int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long int, long int, long int >( long int* destination, const long int* source, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< float, float, long int >( float* destination, const float* source, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< double, double, long int >( double* destination, const double* source, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char, char, int >( char* destination, const char* source, const int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int, int, int >( int* destination, const int* source, const int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int, long int, int >( long int* destination, const long int* source, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float, float, int >( float* destination, const float* source, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double, double, int >( double* destination, const double* source, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< char, char, long int >( char* destination, const char* source, const long int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< int, int, long int >( int* destination, const int* source, const long int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long int, long int, long int >( long int* destination, const long int* source, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< float, float, long int >( float* destination, const float* source, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< double, double, long int >( double* destination, const double* source, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< char, char, int >( const char* data1, const char* data2, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< int, int, int >( const int* data1, const int* data2, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long int, long int, int >( const long int* data1, const long int* data2, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< float, float, int >( const float* data1, const float* data2, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< double, double, int >( const double* data1, const double* data2, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< char, char, long int >( const char* data1, const char* data2, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< int, int, long int >( const int* data1, const int* data2, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long int, long int, long int >( const long int* data1, const long int* data2, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< float, float, long int >( const float* data1, const float* data2, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< double, double, long int >( const double* data1, const double* data2, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char, char, int >( const char* data1, const char* data2, const int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int, int, int >( const int* data1, const int* data2, const int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int, long int, int >( const long int* data1, const long int* data2, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float, float, int >( const float* data1, const float* data2, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double, double, int >( const double* data1, const double* data2, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< char, char, long int >( const char* data1, const char* data2, const long int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< int, int, long int >( const int* data1, const int* data2, const long int size ); -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long int, long int, long int >( const long int* data1, const long int* data2, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< float, float, long int >( const float* data1, const float* data2, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< double, double, long int >( const double* data1, const double* data2, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda, Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char, char, int >( const char* data1, const char* data2, const int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, int, int >( const int* data1, const int* data2, const int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int, long int, int >( const long int* data1, const long int* data2, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float, float, int >( const float* data1, const float* data2, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double, double, int >( const double* data1, const double* data2, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< char, char, long int >( const char* data1, const char* data2, const long int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< int, int, long int >( const int* data1, const int* data2, const long int size ); -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long int, long int, long int >( const long int* data1, const long int* data2, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< float, float, long int >( const float* data1, const float* data2, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< double, double, long int >( const double* data1, const double* data2, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host, Devices::Cuda >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Cuda >::setMemory< char, int >( char* destination, const char& value, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::setMemory< int, int >( int* destination, const int& value, const int size ); -extern template bool ArrayOperations< Devices::Cuda >::setMemory< long int, int >( long int* destination, const long int& value, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::setMemory< float, int >( float* destination, const float& value, const int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::setMemory< double, int >( double* destination, const double& value, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::setMemory< long double, int >( long double* destination, const long double& value, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Cuda >::setMemory< char, long int >( char* destination, const char& value, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::setMemory< int, long int >( int* destination, const int& value, const long int size ); -extern template bool ArrayOperations< Devices::Cuda >::setMemory< long int, long int >( long int* destination, const long int& value, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Cuda >::setMemory< float, long int >( float* destination, const float& value, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Cuda >::setMemory< double, long int >( double* destination, const double& value, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Cuda >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size ); -#endif -#endif - -#endif - } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h index 756731ca9fe645948aa7c9f8e1634e16ebba9a17..c48d4b40bb3fc8997fe0e8adbf5c3fc1a64fb8a1 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h +++ b/src/TNL/Containers/Algorithms/ArrayOperationsHost_impl.h @@ -62,7 +62,7 @@ getMemoryElement( const Element* data ) } template< typename Element, typename Index > -bool +void ArrayOperations< Devices::Host >:: setMemory( Element* data, const Element& value, @@ -70,13 +70,12 @@ setMemory( Element* data, { for( Index i = 0; i < size; i ++ ) data[ i ] = value; - return true; } template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::Host >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -100,7 +99,6 @@ copyMemory( DestinationElement* destination, else for( Index i = 0; i < size; i ++ ) destination[ i ] = ( DestinationElement ) source[ i ]; - return true; } template< typename DestinationElement, @@ -164,140 +162,6 @@ containsOnlyValue( const Element* data, return true; } - -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION - -extern template bool ArrayOperations< Devices::Host >::allocateMemory< char, int >( char*& data, const int size ); -extern template bool ArrayOperations< Devices::Host >::allocateMemory< int, int >( int*& data, const int size ); -extern template bool ArrayOperations< Devices::Host >::allocateMemory< long int, int >( long int*& data, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::allocateMemory< float, int >( float*& data, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::allocateMemory< double, int >( double*& data, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::allocateMemory< long double, int >( long double*& data, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host >::allocateMemory< char, long int >( char*& data, const long int size ); -extern template bool ArrayOperations< Devices::Host >::allocateMemory< int, long int >( int*& data, const long int size ); -extern template bool ArrayOperations< Devices::Host >::allocateMemory< long int, long int >( long int*& data, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::allocateMemory< float, long int >( float*& data, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::allocateMemory< double, long int >( double*& data, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::allocateMemory< long double, long int >( long double*& data, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Host >::freeMemory< char >( char* data ); -extern template bool ArrayOperations< Devices::Host >::freeMemory< int >( int* data ); -extern template bool ArrayOperations< Devices::Host >::freeMemory< long int >( long int* data ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::freeMemory< float >( float* data ); -#endif -extern template bool ArrayOperations< Devices::Host >::freeMemory< double >( double* data ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::freeMemory< long double >( long double* data ); -#endif - -extern template void ArrayOperations< Devices::Host >::setMemoryElement< char >( char* data, const char& value ); -extern template void ArrayOperations< Devices::Host >::setMemoryElement< int >( int* data, const int& value ); -extern template void ArrayOperations< Devices::Host >::setMemoryElement< long int >( long int* data, const long int& value ); -#ifdef INSTANTIATE_FLOAT -extern template void ArrayOperations< Devices::Host >::setMemoryElement< float >( float* data, const float& value ); -#endif -extern template void ArrayOperations< Devices::Host >::setMemoryElement< double >( double* data, const double& value ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template void ArrayOperations< Devices::Host >::setMemoryElement< long double >( long double* data, const long double& value ); -#endif - -extern template char ArrayOperations< Devices::Host >::getMemoryElement< char >( char* data ); -extern template int ArrayOperations< Devices::Host >::getMemoryElement< int >( int* data ); -extern template long int ArrayOperations< Devices::Host >::getMemoryElement< long int >( long int* data ); -#ifdef INSTANTIATE_FLOAT -extern template float ArrayOperations< Devices::Host >::getMemoryElement< float >( float* data ); -#endif -extern template double ArrayOperations< Devices::Host >::getMemoryElement< double >( double* data ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template long double ArrayOperations< Devices::Host >::getMemoryElement< long double >( long double* data ); -#endif - -extern template bool ArrayOperations< Devices::Host >::copyMemory< char, char, int >( char* destination, const char* source, const int size ); -extern template bool ArrayOperations< Devices::Host >::copyMemory< int, int, int >( int* destination, const int* source, const int size ); -extern template bool ArrayOperations< Devices::Host >::copyMemory< long int, long int, int >( long int* destination, const long int* source, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::copyMemory< float, float, int >( float* destination, const float* source, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::copyMemory< double, double, int >( double* destination, const double* source, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, int >( long double* destination, const long double* source, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host >::copyMemory< char, char, long int >( char* destination, const char* source, const long int size ); -extern template bool ArrayOperations< Devices::Host >::copyMemory< int, int, long int >( int* destination, const int* source, const long int size ); -extern template bool ArrayOperations< Devices::Host >::copyMemory< long int, long int, long int >( long int* destination, const long int* source, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::copyMemory< float, float, long int >( float* destination, const float* source, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::copyMemory< double, double, long int >( double* destination, const double* source, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::copyMemory< long double, long double, long int >( long double* destination, const long double* source, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Host >::compareMemory< char, char, int >( const char* data1, const char* data2, const int size ); -extern template bool ArrayOperations< Devices::Host >::compareMemory< int, int, int >( const int* data1, const int* data2, const int size ); -extern template bool ArrayOperations< Devices::Host >::compareMemory< long int, long int, int >( const long int* data1, const long int* data2, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::compareMemory< float, float, int >( const float* data1, const float* data2, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::compareMemory< double, double, int >( const double* data1, const double* data2, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, int >( const long double* data1, const long double* data2, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host >::compareMemory< char, char, long int >( const char* data1, const char* data2, const long int size ); -extern template bool ArrayOperations< Devices::Host >::compareMemory< int, int, long int >( const int* data1, const int* data2, const long int size ); -extern template bool ArrayOperations< Devices::Host >::compareMemory< long int, long int, long int >( const long int* data1, const long int* data2, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::compareMemory< float, float, long int >( const float* data1, const float* data2, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::compareMemory< double, double, long int >( const double* data1, const double* data2, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::compareMemory< long double, long double, long int >( const long double* data1, const long double* data2, const long int size ); -#endif -#endif - -extern template bool ArrayOperations< Devices::Host >::setMemory< char, int >( char* destination, const char& value, const int size ); -extern template bool ArrayOperations< Devices::Host >::setMemory< int, int >( int* destination, const int& value, const int size ); -extern template bool ArrayOperations< Devices::Host >::setMemory< long int, int >( long int* destination, const long int& value, const int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::setMemory< float, int >( float* destination, const float& value, const int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::setMemory< double, int >( double* destination, const double& value, const int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::setMemory< long double, int >( long double* destination, const long double& value, const int size ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool ArrayOperations< Devices::Host >::setMemory< char, long int >( char* destination, const char& value, const long int size ); -extern template bool ArrayOperations< Devices::Host >::setMemory< int, long int >( int* destination, const int& value, const long int size ); -extern template bool ArrayOperations< Devices::Host >::setMemory< long int, long int >( long int* destination, const long int& value, const long int size ); -#ifdef INSTANTIATE_FLOAT -extern template bool ArrayOperations< Devices::Host >::setMemory< float, long int >( float* destination, const float& value, const long int size ); -#endif -extern template bool ArrayOperations< Devices::Host >::setMemory< double, long int >( double* destination, const double& value, const long int size ); -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool ArrayOperations< Devices::Host >::setMemory< long double, long int >( long double* destination, const long double& value, const long int size ); -#endif -#endif - -#endif - } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h b/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h index 6a7b873531686ed0da24274ae97321a725d27f18..0289c3c6c85d482340d74a6f100ad26695294a45 100644 --- a/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h +++ b/src/TNL/Containers/Algorithms/ArrayOperationsMIC_impl.h @@ -78,7 +78,7 @@ getMemoryElement( const Element* data ) } template< typename Element, typename Index > -bool +void ArrayOperations< Devices::MIC >:: setMemory( Element* data, const Element& value, @@ -95,7 +95,6 @@ setMemory( Element* data, for(int i=0;i<size;i++) dst[i]=tmp; } - return true; #else throw Exceptions::MICSupportMissing(); #endif @@ -104,7 +103,7 @@ setMemory( Element* data, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::MIC >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -123,7 +122,6 @@ copyMemory( DestinationElement* destination, { memcpy(dst_ptr.pointer,src_ptr.pointer,size*sizeof(DestinationElement)); } - return true; } else { @@ -136,13 +134,10 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) dst_ptr.pointer[i]=src_ptr.pointer[i]; } - return true; - } #else throw Exceptions::MICSupportMissing(); #endif - return false; } template< typename Element1, @@ -242,7 +237,7 @@ containsOnlyValue( const Element* data, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::Host, Devices::MIC >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -267,7 +262,6 @@ copyMemory( DestinationElement* destination, } memcpy((void*)destination,(void*)&tmp,size*sizeof(SourceElement)); - return true; } else { @@ -277,7 +271,6 @@ copyMemory( DestinationElement* destination, { memcpy((void*)tmp,src_ptr.pointer,size*sizeof(SourceElement)); } - return true; } } else @@ -297,7 +290,6 @@ copyMemory( DestinationElement* destination, } memcpy((void*)destination,(void*)&tmp,size*sizeof(DestinationElement)); - return true; } else { @@ -309,10 +301,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) dst[i]=src_ptr.pointer[i]; } - return true; } } - return false; #else throw Exceptions::MICSupportMissing(); #endif @@ -368,7 +358,7 @@ compareMemory( const Element1* destination, template< typename DestinationElement, typename SourceElement, typename Index > -bool +void ArrayOperations< Devices::MIC, Devices::Host >:: copyMemory( DestinationElement* destination, const SourceElement* source, @@ -393,8 +383,6 @@ copyMemory( DestinationElement* destination, { memcpy(dst_ptr.pointer,(void*)&tmp,size*sizeof(SourceElement)); } - - return true; } else { @@ -404,7 +392,6 @@ copyMemory( DestinationElement* destination, { memcpy(dst_ptr.pointer,(void*)tmp,size*sizeof(SourceElement)); } - return true; } } else @@ -423,7 +410,6 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; } - return true; } else { @@ -435,10 +421,8 @@ copyMemory( DestinationElement* destination, for(int i=0;i<size;i++) dst_ptr.pointer[i]=src[i]; } - return true; } } - return false; #else throw Exceptions::MICSupportMissing(); #endif diff --git a/src/TNL/Containers/Algorithms/Multireduction.h b/src/TNL/Containers/Algorithms/Multireduction.h index 78e408cc1b079c72ad30b6158c82b190a4a929f4..42b8bf28d16842f9d28a624f8995f2ffe959d943 100644 --- a/src/TNL/Containers/Algorithms/Multireduction.h +++ b/src/TNL/Containers/Algorithms/Multireduction.h @@ -18,7 +18,7 @@ namespace TNL { namespace Containers { -namespace Algorithms { +namespace Algorithms { template< typename Device > class Multireduction @@ -30,7 +30,7 @@ class Multireduction< Devices::Cuda > { public: template< typename Operation, typename Index > - static bool + static void reduce( Operation& operation, const int n, const Index size, @@ -45,7 +45,7 @@ class Multireduction< Devices::Host > { public: template< typename Operation, typename Index > - static bool + static void reduce( Operation& operation, const int n, const Index size, @@ -60,7 +60,7 @@ class Multireduction< Devices::MIC > { public: template< typename Operation, typename Index > - static bool + static void reduce( Operation& operation, const int n, const Index size, diff --git a/src/TNL/Containers/Algorithms/Multireduction_impl.h b/src/TNL/Containers/Algorithms/Multireduction_impl.h index 92c1f9b19c4a10a1aa967f97930df0550c058894..505c2be0e4d55b5e2b914a6d91e4d0739f28819a 100644 --- a/src/TNL/Containers/Algorithms/Multireduction_impl.h +++ b/src/TNL/Containers/Algorithms/Multireduction_impl.h @@ -49,7 +49,7 @@ static constexpr int Multireduction_minGpuDataSize = 256;//65536; //16384;//1024 * hostResult: output array of size = n */ template< typename Operation, typename Index > -bool +void Multireduction< Devices::Cuda >:: reduce( Operation& operation, const int n, @@ -75,18 +75,17 @@ reduce( Operation& operation, */ if( n * ldInput1 < Multireduction_minGpuDataSize ) { DataType1 hostArray1[ Multireduction_minGpuDataSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ) ) - return false; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, n * ldInput1 ); if( deviceInput2 ) { using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; _DT2 hostArray2[ Multireduction_minGpuDataSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ) ) - return false; - return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ); + Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult ); } else { - return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult ); + Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult ); } + return; } #ifdef CUDA_REDUCTION_PROFILING @@ -117,8 +116,7 @@ reduce( Operation& operation, * Transfer the reduced data from device to host. */ ResultType resultArray[ n * reducedSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ) ) - return false; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, n * reducedSize ); #ifdef CUDA_REDUCTION_PROFILING timer.stop(); @@ -146,7 +144,7 @@ reduce( Operation& operation, std::cout << " Multireduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); #endif @@ -163,7 +161,7 @@ reduce( Operation& operation, * hostResult: output array of size = n */ template< typename Operation, typename Index > -bool +void Multireduction< Devices::Host >:: reduce( Operation& operation, const int n, @@ -249,12 +247,10 @@ reduce( Operation& operation, #ifdef HAVE_OPENMP } #endif - - return true; } template< typename Operation, typename Index > -bool +void Multireduction< Devices::MIC >:: reduce( Operation& operation, const int n, @@ -267,11 +263,9 @@ reduce( Operation& operation, TNL_ASSERT( n > 0, ); TNL_ASSERT( size <= ldInput1, ); - std::cout << "Not Implemented yet Multireduction< Devices::MIC >::reduce" << std::endl; - return true; + throw std::runtime_error("Not Implemented yet Multireduction< Devices::MIC >::reduce"); } - } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/Reduction.h b/src/TNL/Containers/Algorithms/Reduction.h index e8e544db2091b86603ae02cb6fd86de2771a3fa7..d4f45f30e435590629475d107254822485b33979 100644 --- a/src/TNL/Containers/Algorithms/Reduction.h +++ b/src/TNL/Containers/Algorithms/Reduction.h @@ -30,12 +30,11 @@ class Reduction< Devices::Cuda > { public: template< typename Operation, typename Index > - static bool + static typename Operation::ResultType reduce( Operation& operation, const Index size, const typename Operation::DataType1* deviceInput1, - const typename Operation::DataType2* deviceInput2, - typename Operation::ResultType& result ); + const typename Operation::DataType2* deviceInput2 ); }; template<> @@ -43,12 +42,11 @@ class Reduction< Devices::Host > { public: template< typename Operation, typename Index > - static bool + static typename Operation::ResultType reduce( Operation& operation, const Index size, const typename Operation::DataType1* deviceInput1, - const typename Operation::DataType2* deviceInput2, - typename Operation::ResultType& result ); + const typename Operation::DataType2* deviceInput2 ); }; template<> @@ -56,12 +54,11 @@ class Reduction< Devices::MIC > { public: template< typename Operation, typename Index > - static bool + static typename Operation::ResultType reduce( Operation& operation, const Index size, const typename Operation::DataType1* deviceInput1, - const typename Operation::DataType2* deviceInput2, - typename Operation::ResultType& result ); + const typename Operation::DataType2* deviceInput2 ); }; } // namespace Algorithms diff --git a/src/TNL/Containers/Algorithms/ReductionOperations.h b/src/TNL/Containers/Algorithms/ReductionOperations.h index c6be17ed9ba270bf2beaab883ff21473de8bb9a9..33ef84b1c7f2fb0f9cb5ea4b72fdb9776fcce08c 100644 --- a/src/TNL/Containers/Algorithms/ReductionOperations.h +++ b/src/TNL/Containers/Algorithms/ReductionOperations.h @@ -462,7 +462,7 @@ public: }; template< typename Data1, typename Data2, typename Result = Data1 > -class ParallelReductionDiffAbsSum : public ParallelReductionMax< Result, Result > +class ParallelReductionDiffAbsSum : public ParallelReductionSum< Result, Result > { public: using DataType1 = Data1; diff --git a/src/TNL/Containers/Algorithms/Reduction_impl.h b/src/TNL/Containers/Algorithms/Reduction_impl.h index 9ebfce43f487759df2e55c7fe3224f2a9e95aa30..ee4e8d9792b9832a259d3ac07a571839837eac14 100644 --- a/src/TNL/Containers/Algorithms/Reduction_impl.h +++ b/src/TNL/Containers/Algorithms/Reduction_impl.h @@ -10,7 +10,7 @@ // Implemented by: Tomas Oberhuber, Jakub Klinkovsky -#pragma once +#pragma once #include "Reduction.h" @@ -39,13 +39,12 @@ namespace Algorithms { static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256; template< typename Operation, typename Index > -bool +typename Operation::ResultType Reduction< Devices::Cuda >:: reduce( Operation& operation, const Index size, const typename Operation::DataType1* deviceInput1, - const typename Operation::DataType2* deviceInput2, - typename Operation::ResultType& result ) + const typename Operation::DataType2* deviceInput2 ) { #ifdef HAVE_CUDA @@ -54,7 +53,7 @@ reduce( Operation& operation, typedef typename Operation::DataType2 DataType2; typedef typename Operation::ResultType ResultType; typedef typename Operation::LaterReductionOperation LaterReductionOperation; - + /*** * Only fundamental and pointer types can be safely reduced on host. Complex * objects stored on the device might contain pointers into the device memory, @@ -70,17 +69,15 @@ reduce( Operation& operation, if( can_reduce_all_on_host && size <= Reduction_minGpuDataSize ) { typename std::remove_const< DataType1 >::type hostArray1[ Reduction_minGpuDataSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, size ) ) - return false; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray1, deviceInput1, size ); if( deviceInput2 ) { using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type; typename std::remove_const< _DT2 >::type hostArray2[ Reduction_minGpuDataSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ) ) - return false; - return Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result ); + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size ); + return Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2 ); } else { - return Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr, result ); + return Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr ); } } @@ -111,26 +108,27 @@ reduce( Operation& operation, * Transfer the reduced data from device to host. */ ResultType resultArray[ reducedSize ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ) ) - return false; - + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); + #ifdef CUDA_REDUCTION_PROFILING timer.stop(); std::cout << " Transferring data to CPU took " << timer.getRealTime() << " sec. " << std::endl; timer.reset(); timer.start(); #endif - + /*** * Reduce the data on the host system. */ LaterReductionOperation laterReductionOperation; - Reduction< Devices::Host >::reduce( laterReductionOperation, reducedSize, resultArray, (void*) nullptr, result ); - + const ResultType result = Reduction< Devices::Host >::reduce( laterReductionOperation, reducedSize, resultArray, (void*) nullptr ); + #ifdef CUDA_REDUCTION_PROFILING timer.stop(); std::cout << " Reduction of small data set on CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif + + return result; } else { /*** @@ -153,30 +151,28 @@ reduce( Operation& operation, #endif ResultType resultArray[ 1 ]; - if( ! ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ) ) - return false; - result = resultArray[ 0 ]; + ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( resultArray, deviceAux1, reducedSize ); + const ResultType result = resultArray[ 0 ]; #ifdef CUDA_REDUCTION_PROFILING timer.stop(); std::cout << " Transferring the result to CPU took " << timer.getRealTime() << " sec. " << std::endl; #endif + + return result; } - - return TNL_CHECK_CUDA_DEVICE; #else throw Exceptions::CudaSupportMissing(); #endif }; template< typename Operation, typename Index > -bool +typename Operation::ResultType Reduction< Devices::Host >:: reduce( Operation& operation, const Index size, const typename Operation::DataType1* input1, - const typename Operation::DataType2* input2, - typename Operation::ResultType& result ) + const typename Operation::DataType2* input2 ) { typedef Index IndexType; typedef typename Operation::DataType1 DataType1; @@ -185,1542 +181,58 @@ reduce( Operation& operation, #ifdef HAVE_OPENMP constexpr int block_size = 128; - if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) + if( TNL::Devices::Host::isOMPEnabled() && size >= 2 * block_size ) { + // global result variable + ResultType result = operation.initialValue(); #pragma omp parallel - { - const int blocks = size / block_size; - - // first thread initializes the global result variable - #pragma omp single nowait - { - result = operation.initialValue(); - } - - // initialize thread-local result variable - ResultType r = operation.initialValue(); - - #pragma omp for nowait - for( int b = 0; b < blocks; b++ ) { - const int offset = b * block_size; - for( IndexType i = 0; i < block_size; i++ ) - operation.firstReduction( r, offset + i, input1, input2 ); - } - - // the first thread that reaches here processes the last, incomplete block - #pragma omp single nowait - { - for( IndexType i = blocks * block_size; i < size; i++ ) - operation.firstReduction( r, i, input1, input2 ); - } - - // inter-thread reduction of local results - #pragma omp critical { - operation.commonReduction( result, r ); + const int blocks = size / block_size; + + // initialize array for thread-local results + ResultType r[ 4 ] = { operation.initialValue() }; + + #pragma omp for nowait + for( int b = 0; b < blocks; b++ ) { + const int offset = b * block_size; + for( IndexType i = 0; i < block_size; i += 4 ) { + operation.firstReduction( r[ 0 ], offset + i, input1, input2 ); + operation.firstReduction( r[ 1 ], offset + i + 1, input1, input2 ); + operation.firstReduction( r[ 2 ], offset + i + 2, input1, input2 ); + operation.firstReduction( r[ 3 ], offset + i + 3, input1, input2 ); + } + } + + // the first thread that reaches here processes the last, incomplete block + #pragma omp single nowait + { + for( IndexType i = blocks * block_size; i < size; i++ ) + operation.firstReduction( r[ 0 ], i, input1, input2 ); + } + + // reduction of local results + operation.commonReduction( r[ 0 ], r[ 1 ] ); + operation.commonReduction( r[ 0 ], r[ 2 ] ); + operation.commonReduction( r[ 0 ], r[ 3 ] ); + + // inter-thread reduction of local results + #pragma omp critical + { + operation.commonReduction( result, r[ 0 ] ); + } } + return result; } else { #endif - result = operation.initialValue(); + ResultType result = operation.initialValue(); for( IndexType i = 0; i < size; i++ ) operation.firstReduction( result, i, input1, input2 ); + return result; #ifdef HAVE_OPENMP } #endif - - return true; } - -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION - -/**** - * Sum - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< char, int > > - ( const tnlParallelReductionSum< char, int >& operation, - const typename tnlParallelReductionSum< char, int > :: IndexType size, - const typename tnlParallelReductionSum< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< int, int > > - ( const tnlParallelReductionSum< int, int >& operation, - const typename tnlParallelReductionSum< int, int > :: IndexType size, - const typename tnlParallelReductionSum< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< float, int > > - ( const tnlParallelReductionSum< float, int >& operation, - const typename tnlParallelReductionSum< float, int > :: IndexType size, - const typename tnlParallelReductionSum< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< double, int > > - ( const tnlParallelReductionSum< double, int>& operation, - const typename tnlParallelReductionSum< double, int > :: IndexType size, - const typename tnlParallelReductionSum< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, int > > - ( const tnlParallelReductionSum< long double, int>& operation, - const typename tnlParallelReductionSum< long double, int > :: IndexType size, - const typename tnlParallelReductionSum< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< char, long int > > - ( const tnlParallelReductionSum< char, long int >& operation, - const typename tnlParallelReductionSum< char, long int > :: IndexType size, - const typename tnlParallelReductionSum< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< int, long int > > - ( const tnlParallelReductionSum< int, long int >& operation, - const typename tnlParallelReductionSum< int, long int > :: IndexType size, - const typename tnlParallelReductionSum< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< float, long int > > - ( const tnlParallelReductionSum< float, long int >& operation, - const typename tnlParallelReductionSum< float, long int > :: IndexType size, - const typename tnlParallelReductionSum< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< double, long int > > - ( const tnlParallelReductionSum< double, long int>& operation, - const typename tnlParallelReductionSum< double, long int > :: IndexType size, - const typename tnlParallelReductionSum< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionSum< long double, long int > > - ( const tnlParallelReductionSum< long double, long int>& operation, - const typename tnlParallelReductionSum< long double, long int > :: IndexType size, - const typename tnlParallelReductionSum< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionSum< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionSum< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Min - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< char, int > > - ( const tnlParallelReductionMin< char, int >& operation, - const typename tnlParallelReductionMin< char, int > :: IndexType size, - const typename tnlParallelReductionMin< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< int, int > > - ( const tnlParallelReductionMin< int, int >& operation, - const typename tnlParallelReductionMin< int, int > :: IndexType size, - const typename tnlParallelReductionMin< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< float, int > > - ( const tnlParallelReductionMin< float, int >& operation, - const typename tnlParallelReductionMin< float, int > :: IndexType size, - const typename tnlParallelReductionMin< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< double, int > > - ( const tnlParallelReductionMin< double, int >& operation, - const typename tnlParallelReductionMin< double, int > :: IndexType size, - const typename tnlParallelReductionMin< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, int > > - ( const tnlParallelReductionMin< long double, int>& operation, - const typename tnlParallelReductionMin< long double, int > :: IndexType size, - const typename tnlParallelReductionMin< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< char, long int > > - ( const tnlParallelReductionMin< char, long int >& operation, - const typename tnlParallelReductionMin< char, long int > :: IndexType size, - const typename tnlParallelReductionMin< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< int, long int > > - ( const tnlParallelReductionMin< int, long int >& operation, - const typename tnlParallelReductionMin< int, long int > :: IndexType size, - const typename tnlParallelReductionMin< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< float, long int > > - ( const tnlParallelReductionMin< float, long int >& operation, - const typename tnlParallelReductionMin< float, long int > :: IndexType size, - const typename tnlParallelReductionMin< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< double, long int > > - ( const tnlParallelReductionMin< double, long int>& operation, - const typename tnlParallelReductionMin< double, long int > :: IndexType size, - const typename tnlParallelReductionMin< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionMin< long double, long int > > - ( const tnlParallelReductionMin< long double, long int>& operation, - const typename tnlParallelReductionMin< long double, long int > :: IndexType size, - const typename tnlParallelReductionMin< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMin< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMin< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Max - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< char, int > > - ( const tnlParallelReductionMax< char, int >& operation, - const typename tnlParallelReductionMax< char, int > :: IndexType size, - const typename tnlParallelReductionMax< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< int, int > > - ( const tnlParallelReductionMax< int, int >& operation, - const typename tnlParallelReductionMax< int, int > :: IndexType size, - const typename tnlParallelReductionMax< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< float, int > > - ( const tnlParallelReductionMax< float, int >& operation, - const typename tnlParallelReductionMax< float, int > :: IndexType size, - const typename tnlParallelReductionMax< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< double, int > > - ( const tnlParallelReductionMax< double, int>& operation, - const typename tnlParallelReductionMax< double, int > :: IndexType size, - const typename tnlParallelReductionMax< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, int > > - ( const tnlParallelReductionMax< long double, int>& operation, - const typename tnlParallelReductionMax< long double, int > :: IndexType size, - const typename tnlParallelReductionMax< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< char, long int > > - ( const tnlParallelReductionMax< char, long int >& operation, - const typename tnlParallelReductionMax< char, long int > :: IndexType size, - const typename tnlParallelReductionMax< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< int, long int > > - ( const tnlParallelReductionMax< int, long int >& operation, - const typename tnlParallelReductionMax< int, long int > :: IndexType size, - const typename tnlParallelReductionMax< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< float, long int > > - ( const tnlParallelReductionMax< float, long int >& operation, - const typename tnlParallelReductionMax< float, long int > :: IndexType size, - const typename tnlParallelReductionMax< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< double, long int > > - ( const tnlParallelReductionMax< double, long int>& operation, - const typename tnlParallelReductionMax< double, long int > :: IndexType size, - const typename tnlParallelReductionMax< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionMax< long double, long int > > - ( const tnlParallelReductionMax< long double, long int>& operation, - const typename tnlParallelReductionMax< long double, long int > :: IndexType size, - const typename tnlParallelReductionMax< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionMax< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionMax< long double, long int> :: ResultType& result ); -#endif -#endif - - -/**** - * Abs sum - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, int > > - ( const tnlParallelReductionAbsSum< char, int >& operation, - const typename tnlParallelReductionAbsSum< char, int > :: IndexType size, - const typename tnlParallelReductionAbsSum< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, int > > - ( const tnlParallelReductionAbsSum< int, int >& operation, - const typename tnlParallelReductionAbsSum< int, int > :: IndexType size, - const typename tnlParallelReductionAbsSum< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, int > > - ( const tnlParallelReductionAbsSum< float, int >& operation, - const typename tnlParallelReductionAbsSum< float, int > :: IndexType size, - const typename tnlParallelReductionAbsSum< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, int > > - ( const tnlParallelReductionAbsSum< double, int>& operation, - const typename tnlParallelReductionAbsSum< double, int > :: IndexType size, - const typename tnlParallelReductionAbsSum< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, int > > - ( const tnlParallelReductionAbsSum< long double, int>& operation, - const typename tnlParallelReductionAbsSum< long double, int > :: IndexType size, - const typename tnlParallelReductionAbsSum< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< char, long int > > - ( const tnlParallelReductionAbsSum< char, long int >& operation, - const typename tnlParallelReductionAbsSum< char, long int > :: IndexType size, - const typename tnlParallelReductionAbsSum< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< int, long int > > - ( const tnlParallelReductionAbsSum< int, long int >& operation, - const typename tnlParallelReductionAbsSum< int, long int > :: IndexType size, - const typename tnlParallelReductionAbsSum< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< float, long int > > - ( const tnlParallelReductionAbsSum< float, long int >& operation, - const typename tnlParallelReductionAbsSum< float, long int > :: IndexType size, - const typename tnlParallelReductionAbsSum< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< double, long int > > - ( const tnlParallelReductionAbsSum< double, long int>& operation, - const typename tnlParallelReductionAbsSum< double, long int > :: IndexType size, - const typename tnlParallelReductionAbsSum< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsSum< long double, long int > > - ( const tnlParallelReductionAbsSum< long double, long int>& operation, - const typename tnlParallelReductionAbsSum< long double, long int > :: IndexType size, - const typename tnlParallelReductionAbsSum< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsSum< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsSum< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Abs min - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, int > > - ( const tnlParallelReductionAbsMin< char, int >& operation, - const typename tnlParallelReductionAbsMin< char, int > :: IndexType size, - const typename tnlParallelReductionAbsMin< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, int > > - ( const tnlParallelReductionAbsMin< int, int >& operation, - const typename tnlParallelReductionAbsMin< int, int > :: IndexType size, - const typename tnlParallelReductionAbsMin< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, int > > - ( const tnlParallelReductionAbsMin< float, int >& operation, - const typename tnlParallelReductionAbsMin< float, int > :: IndexType size, - const typename tnlParallelReductionAbsMin< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, int > > - ( const tnlParallelReductionAbsMin< double, int>& operation, - const typename tnlParallelReductionAbsMin< double, int > :: IndexType size, - const typename tnlParallelReductionAbsMin< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, int > > - ( const tnlParallelReductionAbsMin< long double, int>& operation, - const typename tnlParallelReductionAbsMin< long double, int > :: IndexType size, - const typename tnlParallelReductionAbsMin< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< char, long int > > - ( const tnlParallelReductionAbsMin< char, long int >& operation, - const typename tnlParallelReductionAbsMin< char, long int > :: IndexType size, - const typename tnlParallelReductionAbsMin< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< int, long int > > - ( const tnlParallelReductionAbsMin< int, long int >& operation, - const typename tnlParallelReductionAbsMin< int, long int > :: IndexType size, - const typename tnlParallelReductionAbsMin< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< float, long int > > - ( const tnlParallelReductionAbsMin< float, long int >& operation, - const typename tnlParallelReductionAbsMin< float, long int > :: IndexType size, - const typename tnlParallelReductionAbsMin< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< double, long int > > - ( const tnlParallelReductionAbsMin< double, long int>& operation, - const typename tnlParallelReductionAbsMin< double, long int > :: IndexType size, - const typename tnlParallelReductionAbsMin< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMin< long double, long int > > - ( const tnlParallelReductionAbsMin< long double, long int>& operation, - const typename tnlParallelReductionAbsMin< long double, long int > :: IndexType size, - const typename tnlParallelReductionAbsMin< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMin< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMin< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Abs max - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, int > > - ( const tnlParallelReductionAbsMax< char, int >& operation, - const typename tnlParallelReductionAbsMax< char, int > :: IndexType size, - const typename tnlParallelReductionAbsMax< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, int > > - ( const tnlParallelReductionAbsMax< int, int >& operation, - const typename tnlParallelReductionAbsMax< int, int > :: IndexType size, - const typename tnlParallelReductionAbsMax< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, int > > - ( const tnlParallelReductionAbsMax< float, int >& operation, - const typename tnlParallelReductionAbsMax< float, int > :: IndexType size, - const typename tnlParallelReductionAbsMax< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, int > > - ( const tnlParallelReductionAbsMax< double, int>& operation, - const typename tnlParallelReductionAbsMax< double, int > :: IndexType size, - const typename tnlParallelReductionAbsMax< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, int > > - ( const tnlParallelReductionAbsMax< long double, int>& operation, - const typename tnlParallelReductionAbsMax< long double, int > :: IndexType size, - const typename tnlParallelReductionAbsMax< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > - ( const tnlParallelReductionAbsMax< char, long int >& operation, - const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, - const typename tnlParallelReductionAbsMax< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< int, long int > > - ( const tnlParallelReductionAbsMax< int, long int >& operation, - const typename tnlParallelReductionAbsMax< int, long int > :: IndexType size, - const typename tnlParallelReductionAbsMax< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< float, long int > > - ( const tnlParallelReductionAbsMax< float, long int >& operation, - const typename tnlParallelReductionAbsMax< float, long int > :: IndexType size, - const typename tnlParallelReductionAbsMax< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< double, long int > > - ( const tnlParallelReductionAbsMax< double, long int>& operation, - const typename tnlParallelReductionAbsMax< double, long int > :: IndexType size, - const typename tnlParallelReductionAbsMax< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, long int > > - ( const tnlParallelReductionAbsMax< long double, long int>& operation, - const typename tnlParallelReductionAbsMax< long double, long int > :: IndexType size, - const typename tnlParallelReductionAbsMax< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionAbsMax< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionAbsMax< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Logical AND - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, int > > - ( const tnlParallelReductionLogicalAnd< char, int >& operation, - const typename tnlParallelReductionLogicalAnd< char, int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, int > > - ( const tnlParallelReductionLogicalAnd< int, int >& operation, - const typename tnlParallelReductionLogicalAnd< int, int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, int > > - ( const tnlParallelReductionLogicalAnd< float, int >& operation, - const typename tnlParallelReductionLogicalAnd< float, int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, int > > - ( const tnlParallelReductionLogicalAnd< double, int>& operation, - const typename tnlParallelReductionLogicalAnd< double, int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, int > > - ( const tnlParallelReductionLogicalAnd< long double, int>& operation, - const typename tnlParallelReductionLogicalAnd< long double, int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< char, long int > > - ( const tnlParallelReductionLogicalAnd< char, long int >& operation, - const typename tnlParallelReductionLogicalAnd< char, long int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< int, long int > > - ( const tnlParallelReductionLogicalAnd< int, long int >& operation, - const typename tnlParallelReductionLogicalAnd< int, long int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< float, long int > > - ( const tnlParallelReductionLogicalAnd< float, long int >& operation, - const typename tnlParallelReductionLogicalAnd< float, long int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< double, long int > > - ( const tnlParallelReductionLogicalAnd< double, long int>& operation, - const typename tnlParallelReductionLogicalAnd< double, long int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalAnd< long double, long int > > - ( const tnlParallelReductionLogicalAnd< long double, long int>& operation, - const typename tnlParallelReductionLogicalAnd< long double, long int > :: IndexType size, - const typename tnlParallelReductionLogicalAnd< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalAnd< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalAnd< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Logical OR - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, int > > - ( const tnlParallelReductionLogicalOr< char, int >& operation, - const typename tnlParallelReductionLogicalOr< char, int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, int > > - ( const tnlParallelReductionLogicalOr< int, int >& operation, - const typename tnlParallelReductionLogicalOr< int, int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, int > > - ( const tnlParallelReductionLogicalOr< float, int >& operation, - const typename tnlParallelReductionLogicalOr< float, int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, int > > - ( const tnlParallelReductionLogicalOr< double, int>& operation, - const typename tnlParallelReductionLogicalOr< double, int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, int > > - ( const tnlParallelReductionLogicalOr< long double, int>& operation, - const typename tnlParallelReductionLogicalOr< long double, int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< char, long int > > - ( const tnlParallelReductionLogicalOr< char, long int >& operation, - const typename tnlParallelReductionLogicalOr< char, long int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< int, long int > > - ( const tnlParallelReductionLogicalOr< int, long int >& operation, - const typename tnlParallelReductionLogicalOr< int, long int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< float, long int > > - ( const tnlParallelReductionLogicalOr< float, long int >& operation, - const typename tnlParallelReductionLogicalOr< float, long int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< double, long int > > - ( const tnlParallelReductionLogicalOr< double, long int>& operation, - const typename tnlParallelReductionLogicalOr< double, long int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLogicalOr< long double, long int > > - ( const tnlParallelReductionLogicalOr< long double, long int>& operation, - const typename tnlParallelReductionLogicalOr< long double, long int > :: IndexType size, - const typename tnlParallelReductionLogicalOr< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLogicalOr< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLogicalOr< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Lp Norm - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, int > > - ( const tnlParallelReductionLpNorm< float, int >& operation, - const typename tnlParallelReductionLpNorm< float, int > :: IndexType size, - const typename tnlParallelReductionLpNorm< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, int > > - ( const tnlParallelReductionLpNorm< double, int>& operation, - const typename tnlParallelReductionLpNorm< double, int > :: IndexType size, - const typename tnlParallelReductionLpNorm< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, int > > - ( const tnlParallelReductionLpNorm< long double, int>& operation, - const typename tnlParallelReductionLpNorm< long double, int > :: IndexType size, - const typename tnlParallelReductionLpNorm< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< char, long int > > - ( const tnlParallelReductionLpNorm< char, long int >& operation, - const typename tnlParallelReductionLpNorm< char, long int > :: IndexType size, - const typename tnlParallelReductionLpNorm< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< int, long int > > - ( const tnlParallelReductionLpNorm< int, long int >& operation, - const typename tnlParallelReductionLpNorm< int, long int > :: IndexType size, - const typename tnlParallelReductionLpNorm< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< float, long int > > - ( const tnlParallelReductionLpNorm< float, long int >& operation, - const typename tnlParallelReductionLpNorm< float, long int > :: IndexType size, - const typename tnlParallelReductionLpNorm< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< double, long int > > - ( const tnlParallelReductionLpNorm< double, long int>& operation, - const typename tnlParallelReductionLpNorm< double, long int > :: IndexType size, - const typename tnlParallelReductionLpNorm< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionLpNorm< long double, long int > > - ( const tnlParallelReductionLpNorm< long double, long int>& operation, - const typename tnlParallelReductionLpNorm< long double, long int > :: IndexType size, - const typename tnlParallelReductionLpNorm< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionLpNorm< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionLpNorm< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Equalities - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, int > > - ( const tnlParallelReductionEqualities< char, int >& operation, - const typename tnlParallelReductionEqualities< char, int > :: IndexType size, - const typename tnlParallelReductionEqualities< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, int > > - ( const tnlParallelReductionEqualities< int, int >& operation, - const typename tnlParallelReductionEqualities< int, int > :: IndexType size, - const typename tnlParallelReductionEqualities< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, int > > - ( const tnlParallelReductionEqualities< float, int >& operation, - const typename tnlParallelReductionEqualities< float, int > :: IndexType size, - const typename tnlParallelReductionEqualities< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, int > > - ( const tnlParallelReductionEqualities< double, int>& operation, - const typename tnlParallelReductionEqualities< double, int > :: IndexType size, - const typename tnlParallelReductionEqualities< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, int > > - ( const tnlParallelReductionEqualities< long double, int>& operation, - const typename tnlParallelReductionEqualities< long double, int > :: IndexType size, - const typename tnlParallelReductionEqualities< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< char, long int > > - ( const tnlParallelReductionEqualities< char, long int >& operation, - const typename tnlParallelReductionEqualities< char, long int > :: IndexType size, - const typename tnlParallelReductionEqualities< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< int, long int > > - ( const tnlParallelReductionEqualities< int, long int >& operation, - const typename tnlParallelReductionEqualities< int, long int > :: IndexType size, - const typename tnlParallelReductionEqualities< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< float, long int > > - ( const tnlParallelReductionEqualities< float, long int >& operation, - const typename tnlParallelReductionEqualities< float, long int > :: IndexType size, - const typename tnlParallelReductionEqualities< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< double, long int > > - ( const tnlParallelReductionEqualities< double, long int>& operation, - const typename tnlParallelReductionEqualities< double, long int > :: IndexType size, - const typename tnlParallelReductionEqualities< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionEqualities< long double, long int > > - ( const tnlParallelReductionEqualities< long double, long int>& operation, - const typename tnlParallelReductionEqualities< long double, long int > :: IndexType size, - const typename tnlParallelReductionEqualities< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionEqualities< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionEqualities< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Inequalities - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, int > > - ( const tnlParallelReductionInequalities< char, int >& operation, - const typename tnlParallelReductionInequalities< char, int > :: IndexType size, - const typename tnlParallelReductionInequalities< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, int > > - ( const tnlParallelReductionInequalities< int, int >& operation, - const typename tnlParallelReductionInequalities< int, int > :: IndexType size, - const typename tnlParallelReductionInequalities< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, int > > - ( const tnlParallelReductionInequalities< float, int >& operation, - const typename tnlParallelReductionInequalities< float, int > :: IndexType size, - const typename tnlParallelReductionInequalities< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, int > > - ( const tnlParallelReductionInequalities< double, int>& operation, - const typename tnlParallelReductionInequalities< double, int > :: IndexType size, - const typename tnlParallelReductionInequalities< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, int > > - ( const tnlParallelReductionInequalities< long double, int>& operation, - const typename tnlParallelReductionInequalities< long double, int > :: IndexType size, - const typename tnlParallelReductionInequalities< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< char, long int > > - ( const tnlParallelReductionInequalities< char, long int >& operation, - const typename tnlParallelReductionInequalities< char, long int > :: IndexType size, - const typename tnlParallelReductionInequalities< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< int, long int > > - ( const tnlParallelReductionInequalities< int, long int >& operation, - const typename tnlParallelReductionInequalities< int, long int > :: IndexType size, - const typename tnlParallelReductionInequalities< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< float, long int > > - ( const tnlParallelReductionInequalities< float, long int >& operation, - const typename tnlParallelReductionInequalities< float, long int > :: IndexType size, - const typename tnlParallelReductionInequalities< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< double, long int > > - ( const tnlParallelReductionInequalities< double, long int>& operation, - const typename tnlParallelReductionInequalities< double, long int > :: IndexType size, - const typename tnlParallelReductionInequalities< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionInequalities< long double, long int > > - ( const tnlParallelReductionInequalities< long double, long int>& operation, - const typename tnlParallelReductionInequalities< long double, long int > :: IndexType size, - const typename tnlParallelReductionInequalities< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionInequalities< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionInequalities< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * ScalarProduct - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, int > > - ( const tnlParallelReductionScalarProduct< char, int >& operation, - const typename tnlParallelReductionScalarProduct< char, int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, int > > - ( const tnlParallelReductionScalarProduct< int, int >& operation, - const typename tnlParallelReductionScalarProduct< int, int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, int > > - ( const tnlParallelReductionScalarProduct< float, int >& operation, - const typename tnlParallelReductionScalarProduct< float, int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, int > > - ( const tnlParallelReductionScalarProduct< double, int>& operation, - const typename tnlParallelReductionScalarProduct< double, int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, int > > - ( const tnlParallelReductionScalarProduct< long double, int>& operation, - const typename tnlParallelReductionScalarProduct< long double, int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< char, long int > > - ( const tnlParallelReductionScalarProduct< char, long int >& operation, - const typename tnlParallelReductionScalarProduct< char, long int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< int, long int > > - ( const tnlParallelReductionScalarProduct< int, long int >& operation, - const typename tnlParallelReductionScalarProduct< int, long int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< float, long int > > - ( const tnlParallelReductionScalarProduct< float, long int >& operation, - const typename tnlParallelReductionScalarProduct< float, long int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< double, long int > > - ( const tnlParallelReductionScalarProduct< double, long int>& operation, - const typename tnlParallelReductionScalarProduct< double, long int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionScalarProduct< long double, long int > > - ( const tnlParallelReductionScalarProduct< long double, long int>& operation, - const typename tnlParallelReductionScalarProduct< long double, long int > :: IndexType size, - const typename tnlParallelReductionScalarProduct< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionScalarProduct< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionScalarProduct< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff sum - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, int > > - ( const tnlParallelReductionDiffSum< char, int >& operation, - const typename tnlParallelReductionDiffSum< char, int > :: IndexType size, - const typename tnlParallelReductionDiffSum< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, int > > - ( const tnlParallelReductionDiffSum< int, int >& operation, - const typename tnlParallelReductionDiffSum< int, int > :: IndexType size, - const typename tnlParallelReductionDiffSum< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, int > > - ( const tnlParallelReductionDiffSum< float, int >& operation, - const typename tnlParallelReductionDiffSum< float, int > :: IndexType size, - const typename tnlParallelReductionDiffSum< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, int > > - ( const tnlParallelReductionDiffSum< double, int>& operation, - const typename tnlParallelReductionDiffSum< double, int > :: IndexType size, - const typename tnlParallelReductionDiffSum< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, int > > - ( const tnlParallelReductionDiffSum< long double, int>& operation, - const typename tnlParallelReductionDiffSum< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffSum< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< char, long int > > - ( const tnlParallelReductionDiffSum< char, long int >& operation, - const typename tnlParallelReductionDiffSum< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffSum< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< int, long int > > - ( const tnlParallelReductionDiffSum< int, long int >& operation, - const typename tnlParallelReductionDiffSum< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffSum< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< float, long int > > - ( const tnlParallelReductionDiffSum< float, long int >& operation, - const typename tnlParallelReductionDiffSum< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffSum< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< double, long int > > - ( const tnlParallelReductionDiffSum< double, long int>& operation, - const typename tnlParallelReductionDiffSum< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffSum< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffSum< long double, long int > > - ( const tnlParallelReductionDiffSum< long double, long int>& operation, - const typename tnlParallelReductionDiffSum< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffSum< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffSum< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffSum< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff min - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, int > > - ( const tnlParallelReductionDiffMin< char, int >& operation, - const typename tnlParallelReductionDiffMin< char, int > :: IndexType size, - const typename tnlParallelReductionDiffMin< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, int > > - ( const tnlParallelReductionDiffMin< int, int >& operation, - const typename tnlParallelReductionDiffMin< int, int > :: IndexType size, - const typename tnlParallelReductionDiffMin< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, int > > - ( const tnlParallelReductionDiffMin< float, int >& operation, - const typename tnlParallelReductionDiffMin< float, int > :: IndexType size, - const typename tnlParallelReductionDiffMin< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, int > > - ( const tnlParallelReductionDiffMin< double, int>& operation, - const typename tnlParallelReductionDiffMin< double, int > :: IndexType size, - const typename tnlParallelReductionDiffMin< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, int > > - ( const tnlParallelReductionDiffMin< long double, int>& operation, - const typename tnlParallelReductionDiffMin< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffMin< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< char, long int > > - ( const tnlParallelReductionDiffMin< char, long int >& operation, - const typename tnlParallelReductionDiffMin< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffMin< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< int, long int > > - ( const tnlParallelReductionDiffMin< int, long int >& operation, - const typename tnlParallelReductionDiffMin< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffMin< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< float, long int > > - ( const tnlParallelReductionDiffMin< float, long int >& operation, - const typename tnlParallelReductionDiffMin< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffMin< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< double, long int > > - ( const tnlParallelReductionDiffMin< double, long int>& operation, - const typename tnlParallelReductionDiffMin< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffMin< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMin< long double, long int > > - ( const tnlParallelReductionDiffMin< long double, long int>& operation, - const typename tnlParallelReductionDiffMin< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffMin< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMin< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMin< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff max - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, int > > - ( const tnlParallelReductionDiffMax< char, int >& operation, - const typename tnlParallelReductionDiffMax< char, int > :: IndexType size, - const typename tnlParallelReductionDiffMax< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, int > > - ( const tnlParallelReductionDiffMax< int, int >& operation, - const typename tnlParallelReductionDiffMax< int, int > :: IndexType size, - const typename tnlParallelReductionDiffMax< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, int > > - ( const tnlParallelReductionDiffMax< float, int >& operation, - const typename tnlParallelReductionDiffMax< float, int > :: IndexType size, - const typename tnlParallelReductionDiffMax< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, int > > - ( const tnlParallelReductionDiffMax< double, int>& operation, - const typename tnlParallelReductionDiffMax< double, int > :: IndexType size, - const typename tnlParallelReductionDiffMax< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, int > > - ( const tnlParallelReductionDiffMax< long double, int>& operation, - const typename tnlParallelReductionDiffMax< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffMax< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< char, long int > > - ( const tnlParallelReductionDiffMax< char, long int >& operation, - const typename tnlParallelReductionDiffMax< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffMax< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< int, long int > > - ( const tnlParallelReductionDiffMax< int, long int >& operation, - const typename tnlParallelReductionDiffMax< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffMax< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< float, long int > > - ( const tnlParallelReductionDiffMax< float, long int >& operation, - const typename tnlParallelReductionDiffMax< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffMax< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< double, long int > > - ( const tnlParallelReductionDiffMax< double, long int>& operation, - const typename tnlParallelReductionDiffMax< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffMax< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffMax< long double, long int > > - ( const tnlParallelReductionDiffMax< long double, long int>& operation, - const typename tnlParallelReductionDiffMax< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffMax< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffMax< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffMax< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff abs sum - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, int > > - ( const tnlParallelReductionDiffAbsSum< char, int >& operation, - const typename tnlParallelReductionDiffAbsSum< char, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, int > > - ( const tnlParallelReductionDiffAbsSum< int, int >& operation, - const typename tnlParallelReductionDiffAbsSum< int, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, int > > - ( const tnlParallelReductionDiffAbsSum< float, int >& operation, - const typename tnlParallelReductionDiffAbsSum< float, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, int > > - ( const tnlParallelReductionDiffAbsSum< double, int>& operation, - const typename tnlParallelReductionDiffAbsSum< double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, int > > - ( const tnlParallelReductionDiffAbsSum< long double, int>& operation, - const typename tnlParallelReductionDiffAbsSum< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< char, long int > > - ( const tnlParallelReductionDiffAbsSum< char, long int >& operation, - const typename tnlParallelReductionDiffAbsSum< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< int, long int > > - ( const tnlParallelReductionDiffAbsSum< int, long int >& operation, - const typename tnlParallelReductionDiffAbsSum< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< float, long int > > - ( const tnlParallelReductionDiffAbsSum< float, long int >& operation, - const typename tnlParallelReductionDiffAbsSum< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< double, long int > > - ( const tnlParallelReductionDiffAbsSum< double, long int>& operation, - const typename tnlParallelReductionDiffAbsSum< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsSum< long double, long int > > - ( const tnlParallelReductionDiffAbsSum< long double, long int>& operation, - const typename tnlParallelReductionDiffAbsSum< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsSum< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsSum< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsSum< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff abs min - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, int > > - ( const tnlParallelReductionDiffAbsMin< char, int >& operation, - const typename tnlParallelReductionDiffAbsMin< char, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, int > > - ( const tnlParallelReductionDiffAbsMin< int, int >& operation, - const typename tnlParallelReductionDiffAbsMin< int, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, int > > - ( const tnlParallelReductionDiffAbsMin< float, int >& operation, - const typename tnlParallelReductionDiffAbsMin< float, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, int > > - ( const tnlParallelReductionDiffAbsMin< double, int>& operation, - const typename tnlParallelReductionDiffAbsMin< double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, int > > - ( const tnlParallelReductionDiffAbsMin< long double, int>& operation, - const typename tnlParallelReductionDiffAbsMin< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< char, long int > > - ( const tnlParallelReductionDiffAbsMin< char, long int >& operation, - const typename tnlParallelReductionDiffAbsMin< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< int, long int > > - ( const tnlParallelReductionDiffAbsMin< int, long int >& operation, - const typename tnlParallelReductionDiffAbsMin< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< float, long int > > - ( const tnlParallelReductionDiffAbsMin< float, long int >& operation, - const typename tnlParallelReductionDiffAbsMin< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< double, long int > > - ( const tnlParallelReductionDiffAbsMin< double, long int>& operation, - const typename tnlParallelReductionDiffAbsMin< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMin< long double, long int > > - ( const tnlParallelReductionDiffAbsMin< long double, long int>& operation, - const typename tnlParallelReductionDiffAbsMin< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMin< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMin< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMin< long double, long int> :: ResultType& result ); -#endif -#endif - -/**** - * Diff abs max - */ - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, int > > - ( const tnlParallelReductionDiffAbsMax< char, int >& operation, - const typename tnlParallelReductionDiffAbsMax< char, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< char, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< char, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< char, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, int > > - ( const tnlParallelReductionDiffAbsMax< int, int >& operation, - const typename tnlParallelReductionDiffAbsMax< int, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< int, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< int, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< int, int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, int > > - ( const tnlParallelReductionDiffAbsMax< float, int >& operation, - const typename tnlParallelReductionDiffAbsMax< float, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, int > > - ( const tnlParallelReductionDiffAbsMax< double, int>& operation, - const typename tnlParallelReductionDiffAbsMax< double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, int > > - ( const tnlParallelReductionDiffAbsMax< long double, int>& operation, - const typename tnlParallelReductionDiffAbsMax< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< char, long int > > - ( const tnlParallelReductionDiffAbsMax< char, long int >& operation, - const typename tnlParallelReductionDiffAbsMax< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< int, long int > > - ( const tnlParallelReductionDiffAbsMax< int, long int >& operation, - const typename tnlParallelReductionDiffAbsMax< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< float, long int > > - ( const tnlParallelReductionDiffAbsMax< float, long int >& operation, - const typename tnlParallelReductionDiffAbsMax< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< double, long int > > - ( const tnlParallelReductionDiffAbsMax< double, long int>& operation, - const typename tnlParallelReductionDiffAbsMax< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffAbsMax< long double, long int > > - ( const tnlParallelReductionDiffAbsMax< long double, long int>& operation, - const typename tnlParallelReductionDiffAbsMax< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffAbsMax< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffAbsMax< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffAbsMax< long double, long int> :: ResultType& result ); -#endif -#endif - - -/**** - * Diff Lp Norm - */ -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, int > > - ( const tnlParallelReductionDiffLpNorm< float, int >& operation, - const typename tnlParallelReductionDiffLpNorm< float, int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< float, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< float, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< float, int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, int > > - ( const tnlParallelReductionDiffLpNorm< double, int>& operation, - const typename tnlParallelReductionDiffLpNorm< double, int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< double, int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, int > > - ( const tnlParallelReductionDiffLpNorm< long double, int>& operation, - const typename tnlParallelReductionDiffLpNorm< long double, int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< long double, int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< long double, int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< long double, int> :: ResultType& result ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< char, long int > > - ( const tnlParallelReductionDiffLpNorm< char, long int >& operation, - const typename tnlParallelReductionDiffLpNorm< char, long int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< char, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< char, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< char, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< int, long int > > - ( const tnlParallelReductionDiffLpNorm< int, long int >& operation, - const typename tnlParallelReductionDiffLpNorm< int, long int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< int, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< int, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< int, long int > :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< float, long int > > - ( const tnlParallelReductionDiffLpNorm< float, long int >& operation, - const typename tnlParallelReductionDiffLpNorm< float, long int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< float, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< float, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< float, long int> :: ResultType& result ); - -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< double, long int > > - ( const tnlParallelReductionDiffLpNorm< double, long int>& operation, - const typename tnlParallelReductionDiffLpNorm< double, long int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< double, long int> :: ResultType& result ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool reductionOnCudaDevice< tnlParallelReductionDiffLpNorm< long double, long int > > - ( const tnlParallelReductionDiffLpNorm< long double, long int>& operation, - const typename tnlParallelReductionDiffLpNorm< long double, long int > :: IndexType size, - const typename tnlParallelReductionDiffLpNorm< long double, long int > :: DataType1* deviceInput1, - const typename tnlParallelReductionDiffLpNorm< long double, long int > :: DataType2* deviceInput2, - typename tnlParallelReductionDiffLpNorm< long double, long int> :: ResultType& result ); -#endif -#endif - -#endif /* TEMPLATE_EXPLICIT_INSTANTIATION */ - } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h index b1bc4dec94817845791641d9a59b9d5bb43ded35..b87d633e1c919214a9c0ff4c27f2b341f383b501 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h @@ -49,14 +49,11 @@ getVectorMax( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionMax< RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -68,14 +65,11 @@ getVectorMin( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionMin< RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -87,14 +81,11 @@ getVectorAbsMax( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionAbsMax< RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -106,14 +97,11 @@ getVectorAbsMin( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionAbsMin< RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -125,14 +113,11 @@ getVectorL1Norm( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionAbsSum< RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( RealType* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -144,13 +129,11 @@ getVectorL2Norm( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionL2Norm< Real, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0, - result ); + const ResultType result = Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); return std::sqrt( result ); } @@ -169,14 +152,13 @@ getVectorLpNorm( const Vector& v, return getVectorL1Norm< Vector, ResultType >( v ); if( p == 2 ) return getVectorL2Norm< Vector, ResultType >( v ); - ResultType result( 0 ); + Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; operation.setPower( p ); - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0, - result ); + const ResultType result = Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); return std::pow( result, 1.0 / p ); } @@ -189,14 +171,11 @@ getVectorSum( const Vector& v ) TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0 ); Algorithms::ParallelReductionSum< Real, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v.getSize(), - v.getData(), - ( Real* ) 0, - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -208,14 +187,11 @@ getVectorDifferenceMax( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -227,14 +203,11 @@ getVectorDifferenceMin( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } @@ -247,14 +220,11 @@ getVectorDifferenceAbsMax( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffAbsMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -266,14 +236,11 @@ getVectorDifferenceAbsMin( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffAbsMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -285,14 +252,11 @@ getVectorDifferenceL1Norm( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffAbsSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -301,18 +265,14 @@ VectorOperations< Devices::Cuda >:: getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::RealType Real; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffL2Norm< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); + const ResultType result = Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); return std::sqrt( result ); } @@ -323,20 +283,21 @@ getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) { - typedef typename Vector1::RealType Real; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); - ResultType result( 0 ); + if( p == 1.0 ) + return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 ); + if( p == 2.0 ) + return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); + Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; operation.setPower( p ); - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); + const ResultType result = Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); return std::pow( result, 1.0 / p ); } @@ -346,19 +307,14 @@ VectorOperations< Devices::Cuda >:: getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::RealType Real; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionDiffSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } #ifdef HAVE_CUDA @@ -412,14 +368,11 @@ getScalarProduct( const Vector1& v1, TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0 ); Algorithms::ParallelReductionScalarProduct< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; - Reduction< Devices::Cuda >::reduce( operation, - v1.getSize(), - v1.getData(), - v2.getData(), - result ); - return result; + return Reduction< Devices::Cuda >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } #ifdef HAVE_CUDA diff --git a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h index ef938886ed93821d1cd071402c2ac85b66e22c42..5f51938afb301361737a307a9b51ccb59a7f288a 100644 --- a/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h +++ b/src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h @@ -46,18 +46,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorMax( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType RealType; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result = v.getElement( 0 ); - const Index n = v.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = max( result, v.getElement( i ) ); - return result; + Algorithms::ParallelReductionMax< RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -65,18 +62,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorMin( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType RealType; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result = v.getElement( 0 ); - const Index n = v.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = min( result, v.getElement( i ) ); - return result; + Algorithms::ParallelReductionMin< RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -84,18 +78,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorAbsMax( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType RealType; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result = TNL::abs( v.getElement( 0 ) ); - const Index n = v.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = max( result, (ResultType) TNL::abs( v.getElement( i ) ) ); - return result; + Algorithms::ParallelReductionAbsMax< RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } @@ -104,18 +95,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorAbsMin( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType RealType; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result = TNL::abs( v.getElement( 0 ) ); - const Index n = v.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = min( result, (ResultType) TNL::abs( v.getElement( i ) ) ); - return result; + Algorithms::ParallelReductionAbsMin< RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -123,18 +111,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorL1Norm( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType RealType; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0.0 ); - const Index n = v.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += TNL::abs( v[ i ] ); - return result; + Algorithms::ParallelReductionAbsSum< RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( RealType* ) 0 ); } template< typename Vector, typename ResultType > @@ -143,60 +128,15 @@ VectorOperations< Devices::Host >:: getVectorL2Norm( const Vector& v ) { typedef typename Vector::RealType Real; - typedef typename Vector::IndexType Index; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - const Index n = v.getSize(); - -#ifdef OPTIMIZED_VECTOR_HOST_OPERATIONS -#ifdef __GNUC__ - // We need to get the address of the first element to avoid - // bounds checking in TNL::Array::operator[] - const Real* V = v.getData(); -#endif - - ResultType result1 = 0, result2 = 0, result3 = 0, result4 = 0; - Index i = 0; - const Index unroll_limit = n - n % 4; -#ifdef HAVE_OPENMP -#pragma omp parallel for \ - if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) \ - reduction(+:result1,result2,result3,result4) \ - lastprivate(i) -#endif - for( i = 0; i < unroll_limit; i += 4 ) - { -#ifdef __GNUC__ - __builtin_prefetch(V + i + PrefetchDistance, 0, 0); -#endif - result1 += v[ i ] * v[ i ]; - result2 += v[ i + 1 ] * v[ i + 1 ]; - result3 += v[ i + 2 ] * v[ i + 2 ]; - result4 += v[ i + 3 ] * v[ i + 3 ]; - } - - while( i < n ) - { - result1 += v[ i ] * v[ i ]; - i++; - } - - return std::sqrt(result1 + result2 + result3 + result4); - -#else // OPTIMIZED_VECTOR_HOST_OPERATIONS - - ResultType result( 0.0 ); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - { - const Real& aux = v[ i ]; - result += aux * aux; - } + Algorithms::ParallelReductionL2Norm< Real, ResultType > operation; + const ResultType result = Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); return std::sqrt( result ); -#endif // OPTIMIZED_VECTOR_HOST_OPERATIONS } template< typename Vector, typename ResultType, typename Real_ > @@ -205,7 +145,7 @@ VectorOperations< Devices::Host >:: getVectorLpNorm( const Vector& v, const Real_ p ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType Real; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); @@ -215,13 +155,12 @@ getVectorLpNorm( const Vector& v, if( p == 2.0 ) return getVectorL2Norm< Vector, ResultType >( v ); - ResultType result( 0.0 ); - const Index n = v.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += std::pow( TNL::abs( v[ i ] ), p ); + Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; + operation.setPower( p ); + const ResultType result = Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); return std::pow( result, 1.0 / p ); } @@ -230,18 +169,15 @@ ResultType VectorOperations< Devices::Host >:: getVectorSum( const Vector& v ) { - typedef typename Vector::IndexType Index; + typedef typename Vector::RealType Real; TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); - ResultType result( 0.0 ); - const Index n = v.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += v[ i ]; - return result; + Algorithms::ParallelReductionSum< Real, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v.getSize(), + v.getData(), + ( Real* ) 0 ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -250,19 +186,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result = v1.getElement( 0 ) - v2.getElement( 0 ); - const Index n = v1.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = max( result, v1.getElement( i ) - v2.getElement( i ) ); - return result; + Algorithms::ParallelReductionDiffMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -271,19 +202,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result = v1.getElement( 0 ) - v2.getElement( 0 ); - const Index n = v1.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = min( result, v1.getElement( i ) - v2.getElement( i ) ); - return result; + Algorithms::ParallelReductionDiffMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -292,19 +218,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result = TNL::abs( v1.getElement( 0 ) - v2.getElement( 0 ) ); - const Index n = v1.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(max:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = max( result, (ResultType) TNL::abs( v1.getElement( i ) - v2.getElement( i ) ) ); - return result; + Algorithms::ParallelReductionDiffAbsMax< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -313,19 +234,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result = TNL::abs( v1[ 0 ] - v2[ 0 ] ); - const Index n = v1.getSize(); -#if defined( HAVE_OPENMP ) && _OPENMP >= 201107 // OpenMP 3.1 added support for min/max reduction operations -#pragma omp parallel for reduction(min:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 1; i < n; i ++ ) - result = min( result, (ResultType) TNL::abs( v1[ i ] - v2[ i ] ) ); - return result; + Algorithms::ParallelReductionDiffAbsMin< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -334,19 +250,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0.0 ); - const Index n = v1.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += TNL::abs( v1[ i ] - v2[ i ] ); - return result; + Algorithms::ParallelReductionDiffAbsSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2, typename ResultType > @@ -355,21 +266,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0.0 ); - const Index n = v1.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - { - ResultType aux = TNL::abs( v1[ i ] - v2[ i ] ); - result += aux * aux; - } + Algorithms::ParallelReductionDiffL2Norm< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + const ResultType result = Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); return std::sqrt( result ); } @@ -381,8 +285,6 @@ getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." ); @@ -392,13 +294,12 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); - ResultType result( 0.0 ); - const Index n = v1.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += std::pow( TNL::abs( v1.getElement( i ) - v2.getElement( i ) ), p ); + Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; + operation.setPower( p ); + const ResultType result = Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); return std::pow( result, 1.0 / p ); } @@ -408,19 +309,14 @@ VectorOperations< Devices::Host >:: getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - ResultType result( 0.0 ); - const Index n = v1.getSize(); -#ifdef HAVE_OPENMP -#pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() &&n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i ++ ) - result += v1.getElement( i ) - v2.getElement( i ); - return result; + Algorithms::ParallelReductionDiffSum< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } @@ -449,60 +345,14 @@ VectorOperations< Devices::Host >:: getScalarProduct( const Vector1& v1, const Vector2& v2 ) { - typedef typename Vector1::IndexType Index; - TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); - const Index n = v1.getSize(); - -#ifdef OPTIMIZED_VECTOR_HOST_OPERATIONS -#ifdef __GNUC__ - typedef typename Vector1::RealType Real; - // We need to get the address of the first element to avoid - // bounds checking in TNL::Array::operator[] - const Real* V1 = v1.getData(); - const Real* V2 = v2.getData(); -#endif - - ResultType dot1 = 0.0, dot2 = 0.0, dot3 = 0.0, dot4 = 0.0; - Index i = 0; - const Index unroll_limit = n - n % 4; -#ifdef HAVE_OPENMP - #pragma omp parallel for \ - if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) \ - reduction(+:dot1,dot2,dot3,dot4) \ - lastprivate(i) -#endif - for( i = 0; i < unroll_limit; i += 4 ) - { -#ifdef __GNUC__ - __builtin_prefetch(V1 + i + PrefetchDistance, 0, 0); - __builtin_prefetch(V2 + i + PrefetchDistance, 0, 0); -#endif - dot1 += v1[ i ] * v2[ i ]; - dot2 += v1[ i + 1 ] * v2[ i + 1 ]; - dot3 += v1[ i + 2 ] * v2[ i + 2 ]; - dot4 += v1[ i + 3 ] * v2[ i + 3 ]; - } - - while( i < n ) - { - dot1 += v1[ i ] * v2[ i ]; - i++; - } - - return dot1 + dot2 + dot3 + dot4; - -#else // OPTIMIZED_VECTOR_HOST_OPERATIONS - ResultType result( 0.0 ); -#ifdef HAVE_OPENMP - #pragma omp parallel for reduction(+:result) if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold -#endif - for( Index i = 0; i < n; i++ ) - result += v1[ i ] * v2[ i ]; - return result; -#endif // OPTIMIZED_VECTOR_HOST_OPERATIONS + Algorithms::ParallelReductionScalarProduct< typename Vector1::RealType, typename Vector2::RealType, ResultType > operation; + return Reduction< Devices::Host >::reduce( operation, + v1.getSize(), + v1.getData(), + v2.getData() ); } template< typename Vector1, typename Vector2 > @@ -520,42 +370,6 @@ addVector( Vector1& y, const Index n = y.getSize(); -#ifdef OPTIMIZED_VECTOR_HOST_OPERATIONS -#ifdef __GNUC__ - // We need to get the address of the first element to avoid - // bounds checking in TNL::Array::operator[] - typedef typename Vector1::RealType Real; - Real* Y = y.getData(); - const Real* X = x.getData(); -#endif - - Index i = 0; - const Index unroll_limit = n - n % 4; -#ifdef HAVE_OPENMP - #pragma omp parallel for \ - if( n > OpenMPVectorOperationsThreshold ) \ - lastprivate(i) -#endif - for(i = 0; i < unroll_limit; i += 4) - { -#ifdef __GNUC__ - __builtin_prefetch(&y[ i + PrefetchDistance ], 1, 0); - __builtin_prefetch(&x[ i + PrefetchDistance ], 0, 0); -#endif - y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; - y[ i + 1 ] = thisMultiplicator * y[ i + 1 ] + alpha * x[ i + 1 ]; - y[ i + 2 ] = thisMultiplicator * y[ i + 2 ] + alpha * x[ i + 2 ]; - y[ i + 3 ] = thisMultiplicator * y[ i + 3 ] + alpha * x[ i + 3 ]; - } - - while( i < n ) - { - y[i] = thisMultiplicator * y[ i ] + alpha * x[ i ]; - i++; - } - -#else // OPTIMIZED_VECTOR_HOST_OPERATIONS - if( thisMultiplicator == 1.0 ) #ifdef HAVE_OPENMP #pragma omp parallel for if( TNL::Devices::Host::isOMPEnabled() && n > OpenMPVectorOperationsThreshold ) // TODO: check this threshold @@ -568,7 +382,6 @@ addVector( Vector1& y, #endif for( Index i = 0; i < n; i ++ ) y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; -#endif // OPTIMIZED_VECTOR_HOST_OPERATIONS } template< typename Vector1, diff --git a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h index f9aa3dfae9c8e69fbd33cbe0ea087c1500b2032b..9687a7e2af910833855f649aad2e5899c816c4fa 100644 --- a/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h +++ b/src/TNL/Containers/Algorithms/cuda-prefix-sum_impl.h @@ -321,73 +321,6 @@ cudaPrefixSum( const Index size, } } -#ifdef TEMPLATE_EXPLICIT_INSTANTIATION -extern template bool cudaPrefixSum( const int size, - const int blockSize, - const int *deviceInput, - int* deviceOutput, - tnlParallelReductionSum< int, int >& operation, - const PrefixSumType prefixSumType ); - - -extern template bool cudaPrefixSum( const int size, - const int blockSize, - const float *deviceInput, - float* deviceOutput, - tnlParallelReductionSum< float, int >& operation, - const PrefixSumType prefixSumType ); - -extern template bool cudaPrefixSum( const int size, - const int blockSize, - const double *deviceInput, - double* deviceOutput, - tnlParallelReductionSum< double, int >& operation, - const PrefixSumType prefixSumType ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool cudaPrefixSum( const int size, - const int blockSize, - const long double *deviceInput, - long double* deviceOutput, - tnlParallelReductionSum< long double, int >& operation, - const PrefixSumType prefixSumType ); -#endif - -#ifdef INSTANTIATE_LONG_INT -extern template bool cudaPrefixSum( const long int size, - const long int blockSize, - const int *deviceInput, - int* deviceOutput, - tnlParallelReductionSum< int, long int >& operation, - const PrefixSumType prefixSumType ); - - -extern template bool cudaPrefixSum( const long int size, - const long int blockSize, - const float *deviceInput, - float* deviceOutput, - tnlParallelReductionSum< float, long int >& operation, - const PrefixSumType prefixSumType ); - -extern template bool cudaPrefixSum( const long int size, - const long int blockSize, - const double *deviceInput, - double* deviceOutput, - tnlParallelReductionSum< double, long int >& operation, - const PrefixSumType prefixSumType ); - -#ifdef INSTANTIATE_LONG_DOUBLE -extern template bool cudaPrefixSum( const long int size, - const long int blockSize, - const long double *deviceInput, - long double* deviceOutput, - tnlParallelReductionSum< long double, long int >& operation, - const PrefixSumType prefixSumType ); -#endif -#endif - -#endif - } // namespace Algorithms } // namespace Containers } // namespace TNL diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index 8b2b1ef33e64b1dbd2330d375594ada14d19eacc..7e98a0d3db82209e70e810f6773f56e12094f18a 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -93,8 +93,8 @@ public: // must match (i.e. copy-assignment cannot resize). ArrayView& operator=( const ArrayView& view ); - template< typename Value_, typename Device_, typename Index_ > - ArrayView& operator=( const ArrayView< Value_, Device_, Index_ >& view ); + template< typename Array > + ArrayView& operator=( const Array& array ); static String getType(); diff --git a/src/TNL/Containers/ArrayView_impl.h b/src/TNL/Containers/ArrayView_impl.h index d755c4c3111812df4a00f8f633c44f4736c0aefc..96d00e7dea42ae244f928f5679a1045ba78af856 100644 --- a/src/TNL/Containers/ArrayView_impl.h +++ b/src/TNL/Containers/ArrayView_impl.h @@ -123,14 +123,14 @@ operator=( const ArrayView& view ) template< typename Value, typename Device, typename Index > - template< typename Value_, typename Device_, typename Index_ > + template< typename Array > ArrayView< Value, Device, Index >& ArrayView< Value, Device, Index >:: -operator=( const ArrayView< Value_, Device_, Index_ >& view ) +operator=( const Array& array ) { - TNL_ASSERT_EQ( getSize(), view.getSize(), "The sizes of the array views must be equal, views are not resizable." ); + TNL_ASSERT_EQ( getSize(), array.getSize(), "The sizes of the array views must be equal, views are not resizable." ); if( getSize() > 0 ) - Algorithms::ArrayOperations< Device, Device_ >::copyMemory( getData(), view.getData(), getSize() ); + Algorithms::ArrayOperations< Device, typename Array::DeviceType >::copyMemory( getData(), array.getData(), getSize() ); return *this; } diff --git a/src/TNL/Containers/VectorView.h b/src/TNL/Containers/VectorView.h index 4ef863aa90dc27c69aa14b119414f8fa0b0dddbd..fe5a650ef3a4a2711d5f46be27c14d97031d472b 100644 --- a/src/TNL/Containers/VectorView.h +++ b/src/TNL/Containers/VectorView.h @@ -42,14 +42,24 @@ public: #ifndef __NVCC__ using BaseType::ArrayView; #else - // workaround for a bug in nvcc 8.0 (seems to be fixed in 9.0) + // workaround for nvcc 8.0, otherwise the templated constructor below fails + // (works fine in nvcc 9.0) using ArrayView< Real, Device, Index >::ArrayView; #endif + // inherit all ArrayView's assignment operators + using BaseType::operator=; + + // In C++14, default constructors cannot be inherited, although Clang + // and GCC since version 7.0 inherit them. + // https://stackoverflow.com/a/51854172 + __cuda_callable__ + VectorView() = default; + // initialization by base class is not a copy constructor so it has to be explicit - template< typename Element_ > // template catches both const and non-const qualified Element + template< typename Real_ > // template catches both const and non-const qualified Element __cuda_callable__ - VectorView( const ArrayView< Element_, Device, Index >& view ) + VectorView( const ArrayView< Real_, Device, Index >& view ) : BaseType::ArrayView( view ) {} diff --git a/src/TNL/Devices/Cuda.cu b/src/TNL/Devices/Cuda.cu index 2605e6dca83290eb59db54618b7bf91ed1e59150..c1e5248330d0b46d23258a200238756c36aa314d 100644 --- a/src/TNL/Devices/Cuda.cu +++ b/src/TNL/Devices/Cuda.cu @@ -103,11 +103,10 @@ void Cuda::printThreadsSetup( const dim3& blockSize, } -bool Cuda::checkDevice( const char* file_name, int line, cudaError error ) -{ - if( error == cudaSuccess ) - return true; - throw Exceptions::CudaRuntimeError( error, file_name, line ); +void Cuda::checkDevice( const char* file_name, int line, cudaError error ) +{ + if( error != cudaSuccess ) + throw Exceptions::CudaRuntimeError( error, file_name, line ); } std::ostream& operator << ( std::ostream& str, const dim3& d ) diff --git a/src/TNL/Devices/Cuda.h b/src/TNL/Devices/Cuda.h index c73e327e9ac84ab10752a66e783a46da1b288c72..123d3a96c6d940be44208785fb5c776586a0d52d 100644 --- a/src/TNL/Devices/Cuda.h +++ b/src/TNL/Devices/Cuda.h @@ -153,9 +153,9 @@ class Cuda * of calling cudaGetLastError() inside the method. * We recommend to use macro 'TNL_CHECK_CUDA_DEVICE' defined bellow. */ - static bool checkDevice( const char* file_name, int line, cudaError error ); + static void checkDevice( const char* file_name, int line, cudaError error ); #else - static bool checkDevice() { return false; }; + static void checkDevice() {} #endif static void configSetup( Config::ConfigDescription& config, const String& prefix = "" ); diff --git a/src/TNL/Exceptions/CMakeLists.txt b/src/TNL/Exceptions/CMakeLists.txt index 5af96a6b5ed1e9a2078da708a30a8e4bdf4c6159..28ffbdf4fa12617b8e515ca59f04843b8af1a45c 100644 --- a/src/TNL/Exceptions/CMakeLists.txt +++ b/src/TNL/Exceptions/CMakeLists.txt @@ -3,7 +3,6 @@ SET( headers CudaBadAlloc.h CudaSupportMissing.h MICBadAlloc.h MICSupportMissing.h - MPISupportMissing.h - UnsupportedDimension.h ) + MPISupportMissing.h ) INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/Exceptions ) diff --git a/src/TNL/Exceptions/MPISupportMissing.h b/src/TNL/Exceptions/MPISupportMissing.h index 01a08e8973f8d30b75dd3f70a1de0598d6ccd5c5..0cbe7357da60a4c3f3a435d33aaf81def7b3a9ad 100644 --- a/src/TNL/Exceptions/MPISupportMissing.h +++ b/src/TNL/Exceptions/MPISupportMissing.h @@ -1,15 +1,13 @@ /*************************************************************************** MPISupportMissing.h - description ------------------- - begin : Jun 11, 2017 + begin : Jun 11, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -// Implemented by: Jakub Klinkovsky - #pragma once #include <stdexcept> diff --git a/src/TNL/Exceptions/UnsupportedDimension.h b/src/TNL/Exceptions/UnsupportedDimension.h deleted file mode 100644 index cdb9197370cf24376e41922d1d52364d0fc115aa..0000000000000000000000000000000000000000 --- a/src/TNL/Exceptions/UnsupportedDimension.h +++ /dev/null @@ -1,34 +0,0 @@ -/*************************************************************************** - UnsupportedDimension.h - description - ------------------- - begin : Aug 14, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -// Implemented by: Jakub Klinkovsky - -#pragma once - -namespace TNL { -namespace Exceptions { - -struct UnsupportedDimension -{ - UnsupportedDimension( int Dimension ) - : Dimension( Dimension ) - { - } - - const char* what() const throw() - { - return "This dimension is not supported (yet)."; - } - - int Dimension; -}; - -} // namespace Exceptions -} // namespace TNL diff --git a/src/TNL/File_impl.h b/src/TNL/File_impl.h index 0b7d18ad3bb04d4ebca507a021e60825c7c04f19..a27250242144156fdf156b3d889cda3420b069e0 100644 --- a/src/TNL/File_impl.h +++ b/src/TNL/File_impl.h @@ -11,6 +11,7 @@ #pragma once #include <type_traits> +#include <memory> #include <TNL/File.h> #include <TNL/Exceptions/CudaSupportMissing.h> @@ -89,35 +90,27 @@ bool File::read_impl( Type* buffer, this->readElements = 0; const std::size_t host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / sizeof( Type ), elements ); using BaseType = typename std::remove_cv< Type >::type; - BaseType* host_buffer = new BaseType[ host_buffer_size ]; + std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; while( readElements < elements ) { std::size_t transfer = std::min( elements - readElements, host_buffer_size ); - std::size_t transfered = std::fread( host_buffer, sizeof( Type ), transfer, file ); + std::size_t transfered = std::fread( host_buffer.get(), sizeof( Type ), transfer, file ); if( transfered != transfer ) { std::cerr << "I am not able to read the data from the file " << fileName << "." << std::endl; std::cerr << transfered << " bytes were transfered. " << std::endl; std::perror( "Fread ended with the error code" ); - delete[] host_buffer; return false; } - cudaMemcpy( ( void* ) & ( buffer[ readElements ] ), - host_buffer, + cudaMemcpy( (void*) &buffer[ readElements ], + (void*) host_buffer.get(), transfer * sizeof( Type ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) - { - std::cerr << "Transfer of data from the CUDA device to the file " << this->fileName - << " failed." << std::endl; - delete[] host_buffer; - return false; - } + TNL_CHECK_CUDA_DEVICE; this->readElements += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); @@ -233,35 +226,27 @@ bool File::write_impl( const Type* buffer, const std::size_t host_buffer_size = std::min( FileGPUvsCPUTransferBufferSize / sizeof( Type ), elements ); using BaseType = typename std::remove_cv< Type >::type; - BaseType* host_buffer = new BaseType[ host_buffer_size ]; + std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] }; while( this->writtenElements < elements ) { std::size_t transfer = std::min( elements - this->writtenElements, host_buffer_size ); - cudaMemcpy( host_buffer, - ( void* ) & ( buffer[ this->writtenElements ] ), + cudaMemcpy( (void*) host_buffer.get(), + (void*) &buffer[ this->writtenElements ], transfer * sizeof( Type ), cudaMemcpyDeviceToHost ); - if( ! TNL_CHECK_CUDA_DEVICE ) - { - std::cerr << "Transfer of data from the file " << this->fileName - << " to the CUDA device failed." << std::endl; - delete[] host_buffer; - return false; - } - if( std::fwrite( host_buffer, + TNL_CHECK_CUDA_DEVICE; + if( std::fwrite( host_buffer.get(), sizeof( Type ), transfer, this->file ) != transfer ) { std::cerr << "I am not able to write the data to the file " << fileName << "." << std::endl; std::perror( "Fwrite ended with the error code" ); - delete[] host_buffer; return false; } this->writtenElements += transfer; } - delete[] host_buffer; return true; #else throw Exceptions::CudaSupportMissing(); diff --git a/src/TNL/Functions/TestFunction_impl.h b/src/TNL/Functions/TestFunction_impl.h index 5a7e76485339306c4e506202d3574d59571e6b45..3e7da8c33ddc00b236f75cfac25646741eb0e78d 100644 --- a/src/TNL/Functions/TestFunction_impl.h +++ b/src/TNL/Functions/TestFunction_impl.h @@ -137,8 +137,7 @@ setupFunction( const Config::ParameterContainer& parameters, { this->function = Devices::Cuda::passToDevice( *auxFunction ); delete auxFunction; - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; } return true; } @@ -167,8 +166,7 @@ setupOperator( const Config::ParameterContainer& parameters, { this->operator_ = Devices::Cuda::passToDevice( *auxOperator ); delete auxOperator; - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; } return true; } diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h index 2d26fe9954cc5c21540485997072856c8c316f3d..f5fdc767346f9fbc90236f04ab000f3fdc20327b 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/AdEllpack.h @@ -83,6 +83,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef AdEllpack< Real, Device, Index > ThisType; typedef AdEllpack< Real, Devices::Host, Index > HostType; typedef AdEllpack< Real, Devices::Cuda, Index > CudaType; @@ -93,7 +94,7 @@ public: String getTypeVirtual() const; - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getWarp( const IndexType row ) const; @@ -155,7 +156,7 @@ public: void print( std::ostream& str ) const; bool balanceLoad( const RealType average, - const CompressedRowLengthsVector& rowLengths, + ConstCompressedRowLengthsVectorView rowLengths, warpList* list ); void computeWarps( const IndexType SMs, @@ -166,7 +167,7 @@ public: void performRowTest(); - void performRowLengthsTest( const CompressedRowLengthsVector& rowLengths ); + void performRowLengthsTest( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getTotalLoad() const; diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h index 55719e362dc14577ee683f7b49c71e6006ea6e51..daab3b8cdc351459120436186cb3060aeee54d8f 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/AdEllpack_impl.h @@ -182,7 +182,7 @@ template< typename Real, typename Index > void AdEllpack< Real, Device, Index >:: -setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT( this->getRows() > 0, ); TNL_ASSERT( this->getColumns() > 0, ); @@ -250,7 +250,7 @@ Index AdEllpack< Real, Device, Index >::getTotalLoad() const template< typename Real, typename Device, typename Index > -void AdEllpack< Real, Device, Index >::performRowLengthsTest( const CompressedRowLengthsVector& rowLengths ) +void AdEllpack< Real, Device, Index >::performRowLengthsTest( ConstCompressedRowLengthsVectorView rowLengths ) { bool found = false; for( IndexType row = 0; row < this->getRows(); row++ ) @@ -318,6 +318,8 @@ Index AdEllpack< Real, Device, Index >::getWarp( const IndexType row ) const ( ( this->rowOffset.getElement( searchedWarp ) < row ) && ( this->rowOffset.getElement( searchedWarp + 1 ) >= row ) ) ) return searchedWarp; } + // FIXME: non-void function always has to return something sensible + throw "bug - row was not found"; } template< typename Real, @@ -474,7 +476,6 @@ bool AdEllpack< Real, Device, Index >::setRow( const IndexType row, warp++; bool found = false; - IndexType length = 0; IndexType elementPtr; IndexType elPtr = 0; while( ( !found ) && ( elPtr < elements ) ) @@ -694,8 +695,8 @@ template< typename Real, typename Device, typename Index > bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average, - const CompressedRowLengthsVector& rowLengths, - warpList* list ) + ConstCompressedRowLengthsVectorView rowLengths, + warpList* list ) { IndexType offset, rowOffset, localLoad, reduceMap[ 32 ]; diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/BiEllpack.h index ef5f90d47397c4e8314e5dc64b54c51f60a8f013..b724a0ada17dc4efb2c1d0907054715107da01a4 100644 --- a/src/TNL/Matrices/BiEllpack.h +++ b/src/TNL/Matrices/BiEllpack.h @@ -36,6 +36,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef BiEllpack< Real, Device, Index > ThisType; @@ -51,7 +52,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/BiEllpackSymmetric.h index c8d84bbc530ec30b13ce1e4953f972574ba10552..e44921fe837e7d0f0060df78af167c12ea1c102e 100644 --- a/src/TNL/Matrices/BiEllpackSymmetric.h +++ b/src/TNL/Matrices/BiEllpackSymmetric.h @@ -27,6 +27,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef BiEllpackSymmetric< Real, Device, Index > ThisType; @@ -41,7 +42,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/BiEllpackSymmetric_impl.h index 9a7f380eebe7c5b54785055060594967275faa65..5b6f94b57b571963f5ba18c3c4c7e8fb7700fa99 100644 --- a/src/TNL/Matrices/BiEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/BiEllpackSymmetric_impl.h @@ -96,7 +96,7 @@ template< typename Real, typename Device, typename Index, int StripSize > -void BiEllpackSymmetric< Real, Device, Index, StripSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void BiEllpackSymmetric< Real, Device, Index, StripSize >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { if( this->getRows() % this->warpSize != 0 ) this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) ); @@ -108,7 +108,7 @@ void BiEllpackSymmetric< Real, Device, Index, StripSize >::setCompressedRowLengt for( IndexType i = 0; i < this->groupPointers.getSize(); i++ ) this->groupPointers.setElement( i, 0 ); - // TODO: fix this + // FIXME: cannot sort a const vector! //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); @@ -149,7 +149,7 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c << " this->getName() = " << std::endl ); IndexType strip = row / this->warpSize; - IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; + IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) @@ -158,6 +158,12 @@ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getNumberOfGroups( c return ( numberOfGroups - i ); bisection *= 2; } + // FIXME: non-void function always has to return something sensible +#ifndef __CUDA_ARCH__ + throw "bug - row was not found"; +#else + TNL_ASSERT_TRUE( false, "bug - row was not found" ); +#endif } template< typename Real, @@ -390,6 +396,7 @@ bool BiEllpackSymmetric< Real, Device, Index, StripSize >::setRow( const IndexTy } if( thisElementPtr == numberOfElements ) return true; + return false; } template< typename Real, @@ -576,8 +583,8 @@ __cuda_callable__ Index BiEllpackSymmetric< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { - return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); + return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] + - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index 80b182db1e61cb73d4a82a20d5772935f06436e0..ea5e1efb9463915ec724b28b38abbfe64ac596b0 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -99,7 +99,7 @@ template< typename Real, int StripSize > void BiEllpack< Real, Device, Index, StripSize >:: -setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { if( this->getRows() % this->warpSize != 0 ) this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) ); @@ -112,8 +112,9 @@ setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) for( IndexType i = 0; i < this->groupPointers.getSize(); i++ ) this->groupPointers.setElement( i, 0 ); - DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); - DeviceDependentCode::computeColumnSizes( *this, rowLengths ); + // FIXME: cannot sort a const vector! + //DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); + //DeviceDependentCode::computeColumnSizes( *this, rowLengths ); this->groupPointers.computeExclusivePrefixSum(); @@ -153,7 +154,7 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde << " this->getName() = " << std::endl; ); IndexType strip = row / this->warpSize; - IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; + IndexType rowStripPermutation = this->rowPermArray[ row ] - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) @@ -162,6 +163,12 @@ Index BiEllpack< Real, Device, Index, StripSize >::getNumberOfGroups( const Inde return ( numberOfGroups - i ); bisection *= 2; } + // FIXME: non-void function always has to return something sensible +#ifndef __CUDA_ARCH__ + throw "bug - row was not found"; +#else + TNL_ASSERT_TRUE( false, "bug - row was not found" ); +#endif } template< typename Real, @@ -396,6 +403,7 @@ setRow( const IndexType row, } if( thisElementPtr == numberOfElements ) return true; + return false; } template< typename Real, @@ -580,8 +588,8 @@ __cuda_callable__ Index BiEllpack< Real, Device, Index, StripSize >::getGroupLength( const Index strip, const Index group ) const { - return this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group + 1 ) - - this->groupPointers.getElement( strip * ( this->logWarpSize + 1 ) + group ); + return this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group + 1 ] + - this->groupPointers[ strip * ( this->logWarpSize + 1 ) + group ]; } template< typename Real, diff --git a/src/TNL/Matrices/COOMatrix.h b/src/TNL/Matrices/COOMatrix.h index a74a12f0496d3296315d5f308c43d1da487aa719..e67351ab9092c3ea3c504781b4409f4053601caa 100644 --- a/src/TNL/Matrices/COOMatrix.h +++ b/src/TNL/Matrices/COOMatrix.h @@ -34,6 +34,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef COOMatrix< Real, Device, Index > ThisType; typedef COOMatrix< Real, Devices::Host, Index > HostType; typedef COOMatrix< Real, Devices::Cuda, Index > CudaType; @@ -51,7 +52,7 @@ public: IndexType getNumberOfUsedValues() const; - bool setCompressedRowLengths(const CompressedRowLengthsVector& rowLengths); + bool setCompressedRowLengths(ConstCompressedRowLengthsVectorView rowLengths); void getRowLengths(Containers::Vector< IndexType, DeviceType, IndexType >& rowLengths) const; diff --git a/src/TNL/Matrices/COOMatrix_impl.h b/src/TNL/Matrices/COOMatrix_impl.h index a5fc599abf91da2da7b9447fa69631d948a9403e..05439634140745c82bfb23f2df2c0042e7b3741a 100644 --- a/src/TNL/Matrices/COOMatrix_impl.h +++ b/src/TNL/Matrices/COOMatrix_impl.h @@ -84,7 +84,7 @@ Index COOMatrix< Real, Device, Index >::getNumberOfUsedValues() const template< typename Real, typename Device, typename Index > -bool COOMatrix< Real, Device, Index >::setCompressedRowLengths(const CompressedRowLengthsVector& rowLengths) +bool COOMatrix< Real, Device, Index >::setCompressedRowLengths(ConstCompressedRowLengthsVectorView rowLengths) { IndexType size = 0; for(IndexType row = 0; row < this->getRows(); row++) diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/CSR.h index f6d4c6d31b895476e16bc2ce2616a9bc77fce505..ef7ba5d6f925d2e56e0df64b2951fe1752a7f84f 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/CSR.h @@ -45,6 +45,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef CSR< Real, Device, Index > ThisType; typedef CSR< Real, Devices::Host, Index > HostType; typedef CSR< Real, Devices::Cuda, Index > CudaType; @@ -68,7 +69,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 1516e932231c900c6e56b5442b88133cd5267a1a..b4dff85470bf86021c69478138eb3be86f74d593 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Matrices/CSR.h> -#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> #include <TNL/Math.h> #ifdef HAVE_CUSPARSE @@ -87,7 +87,7 @@ void CSR< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index > -void CSR< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); @@ -99,7 +99,7 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLen * necessary length of the vectors this->values * and this->columnIndexes. */ - Containers::Vector< IndexType, DeviceType, IndexType > rowPtrs; + Containers::VectorView< IndexType, DeviceType, IndexType > rowPtrs; rowPtrs.bind( this->rowPointers.getData(), this->getRows() ); rowPtrs = rowLengths; this->rowPointers.setElement( this->rows, 0 ); diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/ChunkedEllpack.h index 8c4a47a320b6a38fff9ab7c9622f79c3caae6b4b..35bbfa89799eff2b248283cda4ef141bcf7eb039 100644 --- a/src/TNL/Matrices/ChunkedEllpack.h +++ b/src/TNL/Matrices/ChunkedEllpack.h @@ -77,6 +77,7 @@ public: typedef Index IndexType; typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo; typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef ChunkedEllpack< Real, Device, Index > ThisType; typedef ChunkedEllpack< Real, Devices::Host, Index > HostType; typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType; @@ -97,7 +98,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -253,9 +254,9 @@ public: protected: - void resolveSliceSizes( const Containers::Vector< Index, Devices::Host, Index >& rowLengths ); + void resolveSliceSizes( ConstCompressedRowLengthsVectorView rowLengths ); - bool setSlice( const CompressedRowLengthsVector& rowLengths, + bool setSlice( ConstCompressedRowLengthsVectorView rowLengths, const IndexType sliceIdx, IndexType& elementsToAllocation ); diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 1a47fe4e608fa2d5087eb404730356517316a913..20dbfa68349b7f23ad66e36c300d1894ea7e40be 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -95,7 +95,7 @@ void ChunkedEllpack< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index > -void ChunkedEllpack< Real, Device, Index >::resolveSliceSizes( const Containers::Vector< Index, Devices::Host, Index >& rowLengths ) +void ChunkedEllpack< Real, Device, Index >::resolveSliceSizes( ConstCompressedRowLengthsVectorView rowLengths ) { /**** * Iterate over rows and allocate slices so that each slice has @@ -132,7 +132,7 @@ void ChunkedEllpack< Real, Device, Index >::resolveSliceSizes( const Containers: template< typename Real, typename Device, typename Index > -bool ChunkedEllpack< Real, Device, Index >::setSlice( const CompressedRowLengthsVector& rowLengths, +bool ChunkedEllpack< Real, Device, Index >::setSlice( ConstCompressedRowLengthsVectorView rowLengths, const IndexType sliceIndex, IndexType& elementsToAllocation ) { @@ -215,7 +215,7 @@ bool ChunkedEllpack< Real, Device, Index >::setSlice( const CompressedRowLengths template< typename Real, typename Device, typename Index > -void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); @@ -1336,7 +1336,7 @@ class ChunkedEllpackDeviceDependentCode< Devices::Host > template< typename Real, typename Index > static void resolveSliceSizes( ChunkedEllpack< Real, Device, Index >& matrix, - const typename ChunkedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths ) + typename ChunkedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { matrix.resolveSliceSizes( rowLengths ); } @@ -1397,7 +1397,7 @@ class ChunkedEllpackDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index > static void resolveSliceSizes( ChunkedEllpack< Real, Device, Index >& matrix, - const typename ChunkedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths ) + typename ChunkedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { } diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index 2de30b3f96f5cf830f4f8c476bbec8025b00ad66..351e8a8c7631feb1cf952cb8dd57762723a29bfc 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -40,6 +40,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef Dense< Real, Device, Index > ThisType; typedef Dense< Real, Devices::Host, Index > HostType; typedef Dense< Real, Devices::Cuda, Index > CudaType; @@ -66,7 +67,7 @@ public: /**** * This method is only for the compatibility with the sparse matrices. */ - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); /**** * Returns maximal number of the nonzero matrix elements that can be stored diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h index 32958f08b2e2551076f5a5e50dbdbc3cff50ba13..e8e9ed9f1c8b54db08345b910939c33905c83bd7 100644 --- a/src/TNL/Matrices/Dense_impl.h +++ b/src/TNL/Matrices/Dense_impl.h @@ -83,7 +83,7 @@ void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 template< typename Real, typename Device, typename Index > -void Dense< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { } diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Ellpack.h index 38333685bfbc59cd94dec2197463ca40557a57e5..1646db1c5c8b37bb635af0ee3501afd3fce6e431 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Ellpack.h @@ -36,6 +36,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef Ellpack< Real, Device, Index > ThisType; @@ -58,7 +59,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); void setConstantCompressedRowLengths( const IndexType& rowLengths ); diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/EllpackSymmetric.h index 4d76a781756ac6abc1e96d4aef1a55bd88d34033..0720d9d5293f5190edfb3411807ae29b380d91d3 100644 --- a/src/TNL/Matrices/EllpackSymmetric.h +++ b/src/TNL/Matrices/EllpackSymmetric.h @@ -28,6 +28,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef EllpackSymmetric< Real, Device, Index > ThisType; @@ -44,7 +45,7 @@ class EllpackSymmetric : public Sparse< Real, Device, Index > void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); bool setConstantRowLengths( const IndexType& rowLengths ); diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/EllpackSymmetricGraph.h index 7b11b6b159adcf658f62f5af76941e7989636578..3a282c796be439209184023fb76aa692ff1e4294 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph.h @@ -28,6 +28,7 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef EllpackSymmetricGraph< Real, Device, Index > ThisType; @@ -44,7 +45,7 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); bool setConstantRowLengths( const IndexType& rowLengths ); @@ -152,10 +153,8 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > const int color ) const; #endif - __cuda_callable__ void computePermutationArray(); - __cuda_callable__ bool rearrangeMatrix( bool verbose ); bool save( File& file ) const; @@ -181,21 +180,20 @@ class EllpackSymmetricGraph : public Sparse< Real, Device, Index > __cuda_callable__ Index getRowsOfColor( IndexType color ) const; - __cuda_callable__ void copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ); __cuda_callable__ - Containers::Vector< Index, Device, Index > getPermutationArray(); + Containers::Vector< Index, Device, Index >& getPermutationArray(); __cuda_callable__ - Containers::Vector< Index, Device, Index > getInversePermutation(); + Containers::Vector< Index, Device, Index >& getInversePermutation(); __cuda_callable__ - Containers::Vector< Index, Device, Index > getColorPointers(); + Containers::Vector< Index, Device, Index >& getColorPointers(); protected: - bool allocateElements(); + void allocateElements(); IndexType rowLengths, alignedRows; diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h index f8b817d9a7cc048971ae9e173566c5d2998bd489..799d07281e62ae441c39162ee2fc5271997effac 100644 --- a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/EllpackSymmetricGraph_impl.h @@ -83,7 +83,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::setDimensions( const IndexTyp template< typename Real, typename Device, typename Index > -void EllpackSymmetricGraph< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void EllpackSymmetricGraph< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT( this->getRows() > 0, ); TNL_ASSERT( this->getColumns() > 0, ); @@ -102,7 +102,7 @@ template< typename Real, __cuda_callable__ Index EllpackSymmetricGraph< Real, Device, Index >::getRowsOfColor( IndexType color ) const { - return this->colorPointers.getElement( color + 1 ) - this->colorPointers.getElement( color ); + return this->colorPointers[ color + 1 ] - this->colorPointers[ color ]; } /* @@ -174,7 +174,6 @@ void EllpackSymmetricGraph< Real, Device, Index >::computeColorsVector( Containe template< typename Real, typename Device, typename Index > -__cuda_callable__ void EllpackSymmetricGraph< Real, Device, Index >::computePermutationArray() { // init vector of colors and permutation array @@ -238,7 +237,6 @@ void EllpackSymmetricGraph< Real, Device, Index >::verifyPermutationArray() template< typename Real, typename Device, typename Index > -__cuda_callable__ bool EllpackSymmetricGraph< Real, Device, Index >::rearrangeMatrix( bool verbose ) { // first we need to know permutation @@ -296,7 +294,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getPermutationArray() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getPermutationArray() { return this->permutationArray; } @@ -305,7 +304,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getInversePermutation() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getInversePermutation() { return this->inversePermutationArray; } @@ -314,7 +314,8 @@ template< typename Real, typename Device, typename Index > __cuda_callable__ -Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, Index >::getColorPointers() +Containers::Vector< Index, Device, Index >& +EllpackSymmetricGraph< Real, Device, Index >::getColorPointers() { return this->colorPointers; } @@ -322,7 +323,6 @@ Containers::Vector< Index, Device, Index > EllpackSymmetricGraph< Real, Device, template< typename Real, typename Device, typename Index > -__cuda_callable__ void EllpackSymmetricGraph< Real, Device, Index >::copyFromHostToCuda( EllpackSymmetricGraph< Real, Devices::Host, Index >& matrix ) { // TODO: fix @@ -331,17 +331,17 @@ void EllpackSymmetricGraph< Real, Device, Index >::copyFromHostToCuda( EllpackSy this->rearranged = true; this->rowLengths = matrix.getRowLengthsInt(); this->alignedRows = matrix.getAlignedRows(); - Containers::Vector< Index, Devices::Host, Index > colorPointers = matrix.getColorPointers(); + Containers::Vector< Index, Devices::Host, Index >& colorPointers = matrix.getColorPointers(); this->colorPointers.setSize( colorPointers.getSize() ); for( IndexType i = 0; i < colorPointers.getSize(); i++ ) this->colorPointers.setElement( i, colorPointers[ i ] ); - Containers::Vector< Index,Devices::Host, Index > permutationArray = matrix.getPermutationArray(); + Containers::Vector< Index,Devices::Host, Index >& permutationArray = matrix.getPermutationArray(); this->permutationArray.setSize( permutationArray.getSize() ); for( IndexType i = 0; i < permutationArray.getSize(); i++ ) this->permutationArray.setElement( i, permutationArray[ i ] ); - Containers::Vector< Index, Devices::Host, Index > inversePermutation = matrix.getInversePermutation(); + Containers::Vector< Index, Devices::Host, Index >& inversePermutation = matrix.getInversePermutation(); this->inversePermutationArray.setSize( inversePermutation.getSize() ); for( IndexType i = 0; i < inversePermutation.getSize(); i++ ) this->inversePermutationArray.setElement( i, inversePermutation[ i ] ); @@ -363,7 +363,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::setConstantRowLengths( const TNL_ASSERT( rowLengths > 0, std::cerr << " rowLengths = " << rowLengths ); this->rowLengths = rowLengths; if( this->rows > 0 ) - return allocateElements(); + allocateElements(); return true; } @@ -445,37 +445,37 @@ bool EllpackSymmetricGraph< Real, Device, Index > :: addElementFast( const Index const RealType& thisElementMultiplicator ) { typedef EllpackSymmetricGraphDeviceDependentCode< DeviceType > DDCType; - IndexType i = DDCType::getRowBegin( *this, this->permutationArray.getElement( row ) ); - const IndexType rowEnd = DDCType::getRowEnd( *this, this->permutationArray.getElement( row ) ); + IndexType i = DDCType::getRowBegin( *this, this->permutationArray[ row ] ); + const IndexType rowEnd = DDCType::getRowEnd( *this, this->permutationArray[ row ] ); const IndexType step = DDCType::getElementStep( *this ); while( i < rowEnd && - this->columnIndexes.getElement( i ) < column && - this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) i += step; + this->columnIndexes[ i ] < column && + this->columnIndexes[ i ] != this->getPaddingIndex() ) i += step; if( i == rowEnd ) return false; - if( this->columnIndexes.getElement( i ) == column ) + if( this->columnIndexes[ i ] == column ) { - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value); + this->values[ i ] = thisElementMultiplicator * this->values[ i ] + value; return true; } else - if( this->columnIndexes.getElement( i ) == this->getPaddingIndex() ) // artificial zero + if( this->columnIndexes[ i ] == this->getPaddingIndex() ) // artificial zero { - this->columnIndexes.setElement( i, column); - this->values.setElement( i, value); + this->columnIndexes[ i ] = column; + this->values[ i ] = value; } else { Index j = rowEnd - step; while( j > i ) { - this->columnIndexes.setElement( j, this->columnIndexes.getElement( j - step ) ); - this->values.setElement( j, this->values.getElement( j - step ) ); + this->columnIndexes[ j ] = this->columnIndexes[ j - step ]; + this->values[ j ] = this->values[ j - step ]; j -= step; } - this->columnIndexes.setElement( i, column ); - this->values.setElement( i, value ); + this->columnIndexes[ i ] = column; + this->values[ i ] = value; } return true; } @@ -783,6 +783,7 @@ bool EllpackSymmetricGraph< Real, Device, Index >::help( bool verbose ) { if( !this->rearranged ) return this->rearrangeMatrix( verbose ); + return true; } template< typename Real, @@ -810,7 +811,7 @@ void EllpackSymmetricGraph< Real, Device, Index >::print( std::ostream& str ) co template< typename Real, typename Device, typename Index > -bool EllpackSymmetricGraph< Real, Device, Index >::allocateElements() +void EllpackSymmetricGraph< Real, Device, Index >::allocateElements() { Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths ); } diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/EllpackSymmetric_impl.h index c353cbb392191c5a787263e808a0f59110d5cad4..42202a883c0c887317aaf68904dfa0bddf27a646 100644 --- a/src/TNL/Matrices/EllpackSymmetric_impl.h +++ b/src/TNL/Matrices/EllpackSymmetric_impl.h @@ -66,7 +66,7 @@ void EllpackSymmetric< Real, Device, Index >::setDimensions( const IndexType row template< typename Real, typename Device, typename Index > -void EllpackSymmetric< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void EllpackSymmetric< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT( this->getRows() > 0, ); TNL_ASSERT( this->getColumns() > 0, ); @@ -85,7 +85,7 @@ bool EllpackSymmetric< Real, Device, Index >::setConstantRowLengths( const Index std::cerr << " rowLengths = " << rowLengths ); this->rowLengths = rowLengths; if( this->rows > 0 ) - return allocateElements(); + allocateElements(); return true; } diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index 9801b6bcac54bdff89337428d3b83968ebf3759a..6186206439474d97d8edced12b4671b257b6f0ed 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -84,7 +84,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index > -void Ellpack< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); @@ -933,7 +933,7 @@ class EllpackDeviceDependentCode< Devices::Cuda > //Devices::Cuda::freeFromDevice( kernel_inVector ); //Devices::Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; - cudaThreadSynchronize(); + cudaDeviceSynchronize(); #endif } diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index a30d8c2a420bc3d0cae2ad07977e72717cc5431f..b7c205998f77d442dbaf4efea7021f1f169b15cb 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -13,6 +13,7 @@ #include <TNL/Object.h> #include <TNL/Devices/Host.h> #include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> namespace TNL { namespace Matrices { @@ -27,6 +28,8 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef Containers::VectorView< const IndexType, DeviceType, IndexType > ConstCompressedRowLengthsVectorView; typedef Containers::Vector< RealType, DeviceType, IndexType > ValuesVector; Matrix(); @@ -34,13 +37,15 @@ public: virtual void setDimensions( const IndexType rows, const IndexType columns ); - virtual void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0; + virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0; virtual IndexType getRowLength( const IndexType row ) const = 0; // TODO: implementation is not parallel // TODO: it would be nice if padding zeros could be stripped - virtual void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + + virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; template< typename Real2, typename Device2, typename Index2 > void setLike( const Matrix< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/MatrixOperations.h b/src/TNL/Matrices/MatrixOperations.h index 9bfa472c5d7fcb6a69828780b44d39f022b6b446..6ae4dd07e47cda53e23e6c9eea5666ed53b4dba3 100644 --- a/src/TNL/Matrices/MatrixOperations.h +++ b/src/TNL/Matrices/MatrixOperations.h @@ -340,8 +340,7 @@ public: // TODO: use static storage, e.g. from the CudaReductionBuffer, to avoid frequent reallocations Containers::Vector< RealType, Devices::Cuda, IndexType > xDevice; xDevice.setSize( n ); - if( ! Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< RealType, RealType, IndexType >( xDevice.getData(), x, n ) ) - throw 1; + Containers::Algorithms::ArrayOperations< Devices::Cuda, Devices::Host >::copyMemory< RealType, RealType, IndexType >( xDevice.getData(), x, n ); // desGridSize = blocksPerMultiprocessor * numberOfMultiprocessors const int desGridSize = 32 * Devices::CudaDeviceInfo::getCudaMultiprocessors( Devices::CudaDeviceInfo::getActiveDevice() ); diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h index 9728020053edca6d340198f666accc32a4aca46d..cb685917fd676e0d062f400ab0fd42836dbe1e05 100644 --- a/src/TNL/Matrices/Matrix_impl.h +++ b/src/TNL/Matrices/Matrix_impl.h @@ -43,6 +43,15 @@ template< typename Real, void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const { rowLengths.setSize( this->getRows() ); + getCompressedRowLengths( CompressedRowLengthsVectorView( rowLengths ) ); +} + +template< typename Real, + typename Device, + typename Index > +void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); for( IndexType row = 0; row < this->getRows(); row++ ) rowLengths.setElement( row, this->getRowLength( row ) ); } diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 9b8f18779ceb9afa15e3e27f3610a2b0fa23fde6..cfa798e7a254916455db5c896640495bdbc504c1 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -37,6 +37,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef Multidiagonal< Real, Device, Index > ThisType; typedef Multidiagonal< Real, Devices::Host, Index > HostType; typedef Multidiagonal< Real, Devices::Cuda, Index > CudaType; @@ -57,7 +58,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Multidiagonal_impl.h index 5f7228d698db1a47dbc62f2b540c08b1e3f9b86c..bd4c24691252ac425466906c549168248aca8244 100644 --- a/src/TNL/Matrices/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Multidiagonal_impl.h @@ -83,7 +83,7 @@ void Multidiagonal< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index > -void Multidiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void Multidiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { /**** * TODO: implement some check here similar to the one in the tridiagonal matrix diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/SlicedEllpack.h index 815728d7a58d588a1791c1aa80b84bcd81da8f4b..6f68f2fa8aea4979b8f4685d2ee25d3039653ea7 100644 --- a/src/TNL/Matrices/SlicedEllpack.h +++ b/src/TNL/Matrices/SlicedEllpack.h @@ -41,8 +41,8 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::CompressedRowLengthsVector* rowLengths, - int gridIdx ); + typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, + int gridIdx ); #endif template< typename Real, @@ -65,6 +65,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef SlicedEllpack< Real, Device, Index, SliceSize > ThisType; @@ -88,7 +89,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -225,7 +226,7 @@ protected: // TODO: The friend declaration above does not work because of __global__ storage specifier. Therefore we declare the following method as public. Fix this, when possible. public: - __device__ void computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, + __device__ void computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, const IndexType sliceIdx ); #endif }; diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/SlicedEllpackSymmetric.h index 0ef1c26d8eeb3d55ea7bd2467d732e886f12c069..d9abb0de2ef664fd4032e8e5b0e00203093eb250 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric.h @@ -30,7 +30,7 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::CompressedRowLengthsVector* rowLengths, + typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, int gridIdx ); #endif @@ -46,6 +46,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef SlicedEllpackSymmetric< Real, Device, Index > ThisType; @@ -62,7 +63,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index > void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -196,7 +197,7 @@ class SlicedEllpackSymmetric : public Sparse< Real, Device, Index > // TODO: The friend declaration above does not work because of __global__ storage specifier. Therefore we declare the following method as public. Fix this, when possible. public: - __device__ void computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, + __device__ void computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, const IndexType sliceIdx ); #endif diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h index 3cab23d1db6613fc1a2b93c5561987cae9a0a15d..a2ab000957227a88a9655888cb417cea6498f56f 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h @@ -30,8 +30,8 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::RowLengthsVector* rowLengths, - int gridIdx ); + typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, + int gridIdx ); #endif template< typename Real, @@ -46,6 +46,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef SlicedEllpackSymmetricGraph< Real, Device, Index > ThisType; @@ -62,7 +63,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; @@ -194,10 +195,8 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > void copyFromHostToCuda( SlicedEllpackSymmetricGraph< Real, Devices::Host, Index, SliceSize >& matrix ); - __cuda_callable__ bool rearrangeMatrix( bool verbose = false ); - __cuda_callable__ void computePermutationArray(); Containers::Vector< Index, Device, Index > getSlicePointers(); @@ -229,7 +228,7 @@ class SlicedEllpackSymmetricGraph : public Sparse< Real, Device, Index > // TODO: The friend declaration above does not work because of __global__ storage specifier. Therefore we declare the following method as public. Fix this, when possible. public: - __device__ void computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, + __device__ void computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, const IndexType sliceIdx ); #endif diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h index 720796425bf1a296f005507302afb81d6d1544ab..9f09a21c506bd08110bc8360e17d3bf1f3a4097c 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h @@ -65,7 +65,7 @@ template< typename Real, typename Device, typename Index, int SliceSize > -void SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT( this->getRows() > 0, ); TNL_ASSERT( this->getColumns() > 0, ); @@ -701,7 +701,6 @@ Index SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::getRealRowL Index rowBegin = slicePointer + rowLength * ( row - sliceIdx * SliceSize ); Index rowEnd = rowBegin + rowLength; - Index step = 1; Index length = 0; for( Index i = rowBegin; i < rowEnd; i++ ) if( this->columnIndexes.getElement( i ) != this->getPaddingIndex() ) @@ -764,7 +763,6 @@ bool SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::rearrangeMat for( IndexType row = slice * SliceSize; row < (slice + 1) * SliceSize && row < this->getRows(); row++ ) { IndexType rowBegin = slicePointerOrig + rowLengthOrig * ( row - slice * SliceSize ); - IndexType rowEnd = rowBegin + rowLengthOrig; IndexType elementPointer = rowBegin; IndexType sliceNew = this->permutationArray.getElement( row ) / SliceSize; @@ -971,8 +969,8 @@ template< typename Real, typename Device, typename Index, int SliceSize > -__device__ void SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, - const IndexType sliceIdx ) +__device__ void SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, + const IndexType sliceIdx ) { Index rowIdx = sliceIdx * SliceSize; Index rowInSliceIdx( 0 ); @@ -1040,8 +1038,8 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpackSymmetricGraph< Real, Device, Index >::RowLengthsVector& rowLengths, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, + typename SlicedEllpackSymmetricGraph< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths, Containers::Vector< Index, Device, Index >& sliceRowLengths, Containers::Vector< Index, Device, Index >& slicePointers ) { @@ -1120,11 +1118,11 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::RowLengthsVector* rowLengths, - int gridIdx ) + typename SlicedEllpackSymmetricGraph< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVector rowLengths, + int gridIdx ) { const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; - matrix->computeMaximalRowLengthInSlicesCuda( *rowLengths, sliceIdx ); + matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -1233,8 +1231,8 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpackSymmetricGraph< Real, Device, Index >::RowLengthsVector& rowLengths, + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize >& matrix, + typename SlicedEllpackSymmetricGraph< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths, Containers::Vector< Index, Device, Index >& sliceRowLengths, Containers::Vector< Index, Device, Index >& slicePointers ) { @@ -1242,7 +1240,6 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > typedef SlicedEllpackSymmetricGraph< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); @@ -1253,11 +1250,10 @@ class SlicedEllpackSymmetricGraphDeviceDependentCode< Devices::Cuda > cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); SlicedEllpackSymmetricGraph_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, - kernel_rowLengths, + rowLengths, gridIdx ); } Devices::Cuda::freeFromDevice( kernel_matrix ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h index 14af7673483b7a153ca48f604850517e2ffe8ecb..402ac5a6c8128ab67bbf2393528fb3a2b58b9077 100644 --- a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h +++ b/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h @@ -64,7 +64,7 @@ template< typename Real, typename Device, typename Index, int SliceSize > -void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT( this->getRows() > 0, ); TNL_ASSERT( this->getColumns() > 0, ); @@ -75,7 +75,7 @@ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::setCompressedRowL // TODO: Uncomment the next line and fix the compilation //DeviceDependentCode::computeMaximalRowLengthInSlices( *this, rowLengths ); - TNL_ASSERT( false, "code fix required" ); + throw std::runtime_error("code fix required"); this->maxRowLength = rowLengths.max(); @@ -693,8 +693,8 @@ template< typename Real, typename Device, typename Index, int SliceSize > -__device__ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, - const IndexType sliceIdx ) +__device__ void SlicedEllpackSymmetric< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, + const IndexType sliceIdx ) { Index rowIdx = sliceIdx * SliceSize; Index rowInSliceIdx( 0 ); @@ -743,6 +743,7 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > + __cuda_callable__ static void initRowTraverseFast( const SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, const Index row, Index& rowBegin, @@ -762,8 +763,8 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Host > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpackSymmetric< Real, Device, Index >::RowLengthsVector& rowLengths ) + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, + typename SlicedEllpackSymmetric< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { Index row( 0 ), slice( 0 ), sliceRowLength( 0 ); while( row < matrix.getRows() ) @@ -806,11 +807,11 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::RowLengthsVector* rowLengths, + typename SlicedEllpackSymmetric< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, int gridIdx ) { const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; - matrix->computeMaximalRowLengthInSlicesCuda( *rowLengths, sliceIdx ); + matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -862,14 +863,13 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpackSymmetric< Real, Device, Index >::RowLengthsVector& rowLengths ) + static void computeMaximalRowLengthInSlices( SlicedEllpackSymmetric< Real, Device, Index, SliceSize >& matrix, + typename SlicedEllpackSymmetric< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { #ifdef HAVE_CUDA typedef SlicedEllpackSymmetric< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::RowLengthsVector CompressedRowLengthsVector; Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); @@ -880,11 +880,10 @@ class SlicedEllpackSymmetricDeviceDependentCode< Devices::Cuda > cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); SlicedEllpackSymmetric_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, - kernel_rowLengths, + rowLengths, gridIdx ); } Devices::Cuda::freeFromDevice( kernel_matrix ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index 2ff01b49c51943c7411626257a57f273c6880b05..95a601a00a01ead3f11d0cd0ca0f96a0373b9606 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -82,7 +82,7 @@ template< typename Real, typename Device, typename Index, int SliceSize > -void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { TNL_ASSERT_GT( this->getRows(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); @@ -772,8 +772,8 @@ template< typename Real, typename Device, typename Index, int SliceSize > -__device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( const CompressedRowLengthsVector& rowLengths, - const IndexType sliceIdx ) +__device__ void SlicedEllpack< Real, Device, Index, SliceSize >::computeMaximalRowLengthInSlicesCuda( ConstCompressedRowLengthsVectorView rowLengths, + const IndexType sliceIdx ) { Index rowIdx = sliceIdx * SliceSize; Index rowInSliceIdx( 0 ); @@ -843,7 +843,7 @@ class SlicedEllpackDeviceDependentCode< Devices::Host > typename Index, int SliceSize > static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths ) + typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { Index row( 0 ), slice( 0 ), sliceRowLength( 0 ); while( row < matrix.getRows() ) @@ -888,11 +888,11 @@ template< typename Real, typename Index, int SliceSize > __global__ void SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel( SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >* matrix, - const typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::CompressedRowLengthsVector* rowLengths, - int gridIdx ) + typename SlicedEllpack< Real, Devices::Cuda, Index, SliceSize >::ConstCompressedRowLengthsVectorView rowLengths, + int gridIdx ) { const Index sliceIdx = gridIdx * Devices::Cuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x; - matrix->computeMaximalRowLengthInSlicesCuda( *rowLengths, sliceIdx ); + matrix->computeMaximalRowLengthInSlicesCuda( rowLengths, sliceIdx ); } #endif @@ -984,13 +984,12 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > typename Index, int SliceSize > static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths ) + typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { #ifdef HAVE_CUDA typedef SlicedEllpack< Real, Device, Index, SliceSize > Matrix; typedef typename Matrix::CompressedRowLengthsVector CompressedRowLengthsVector; Matrix* kernel_matrix = Devices::Cuda::passToDevice( matrix ); - CompressedRowLengthsVector* kernel_rowLengths = Devices::Cuda::passToDevice( rowLengths ); const Index numberOfSlices = roundUpDivision( matrix.getRows(), SliceSize ); dim3 cudaBlockSize( 256 ), cudaGridSize( Devices::Cuda::getMaxGridSize() ); const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x ); @@ -1001,11 +1000,10 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > cudaGridSize.x = cudaBlocks % Devices::Cuda::getMaxGridSize(); SlicedEllpack_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize ><<< cudaGridSize, cudaBlockSize >>> ( kernel_matrix, - kernel_rowLengths, + rowLengths, gridIdx ); } Devices::Cuda::freeFromDevice( kernel_matrix ); - Devices::Cuda::freeFromDevice( kernel_rowLengths ); TNL_CHECK_CUDA_DEVICE; #endif return true; @@ -1053,7 +1051,7 @@ class SlicedEllpackDeviceDependentCode< Devices::Cuda > //Devices::Cuda::freeFromDevice( kernel_inVector ); //Devices::Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; - cudaThreadSynchronize(); + cudaDeviceSynchronize(); #endif } @@ -1094,8 +1092,8 @@ class SlicedEllpackDeviceDependentCode< Devices::MIC > template< typename Real, typename Index, int SliceSize > - static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix, - const typename SlicedEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths ) + static bool computeMaximalRowLengthInSlices( SlicedEllpack< Real, Device, Index, SliceSize >& matrix, + typename SlicedEllpack< Real, Device, Index >::ConstCompressedRowLengthsVectorView rowLengths ) { throw std::runtime_error("Not Implemented yet SlicedEllpackDeviceDependentCode< Devices::MIC >::computeMaximalRowLengthInSlices"); } diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Sparse.h index 2a694826b94e9d757079f72942f3f810ce136885..110dd7a40a57372c9fc65ea94c6bac1adf3c0f54 100644 --- a/src/TNL/Matrices/Sparse.h +++ b/src/TNL/Matrices/Sparse.h @@ -26,7 +26,6 @@ class Sparse : public Matrix< Real, Device, Index > typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; - typedef typename Matrix< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; @@ -34,8 +33,6 @@ class Sparse : public Matrix< Real, Device, Index > Sparse(); - virtual void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) = 0; - template< typename Real2, typename Device2, typename Index2 > void setLike( const Sparse< Real2, Device2, Index2 >& matrix ); diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 472cadffcd4194270d5218e3c0ea1415b2c7ae5c..153c3bdbc4b6235022a44bb297d09c7fe5cbc458 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -39,6 +39,7 @@ public: typedef Device DeviceType; typedef Index IndexType; typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef Tridiagonal< Real, Device, Index > ThisType; typedef Tridiagonal< Real, Devices::Host, Index > HostType; typedef Tridiagonal< Real, Devices::Cuda, Index > CudaType; @@ -58,7 +59,7 @@ public: void setDimensions( const IndexType rows, const IndexType columns ); - void setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ); + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h index 66fe9d7e80a80f93a1d16f1e741008f4de6a0787..faee4815491672b467fec15774f9a7cacd5efa6c 100644 --- a/src/TNL/Matrices/Tridiagonal_impl.h +++ b/src/TNL/Matrices/Tridiagonal_impl.h @@ -75,7 +75,7 @@ void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index > -void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( const CompressedRowLengthsVector& rowLengths ) +void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { if( rowLengths[ 0 ] > 2 ) throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp index 5a0181a05b5599569605b9f762887a70db10f68b..2a6d35bf3aaac41fb4be603672f837171a2fb010 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid.hpp @@ -11,9 +11,7 @@ #pragma once #include <cstdlib> -#include <TNL/StaticVectorFor.h> #include <TNL/Communicators/MpiCommunicator.h> -#include <TNL/Exceptions/UnsupportedDimension.h> #include <iostream> diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h index ed43e38b265d353936bfb09f907096695cea5af4..d2f7a18edb5a59beb113a8b6710a28001c9a9c5b 100644 --- a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h +++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO_MeshFunction.h @@ -57,7 +57,8 @@ class DistributedGridIO<Functions::MeshFunction<MeshType>,LocalCopy,Device> newMesh->setOrigin(origin+TNL::Containers::Scale(spaceSteps,localBegin)); File meshFile; - meshFile.open( fileName+String("-mesh-")+distrGrid->printProcessCoords()+String(".tnl"),IOMode::write); + bool ok=meshFile.open( fileName+String("-mesh-")+distrGrid->printProcessCoords()+String(".tnl"),IOMode::write); + TNL_ASSERT_TRUE(ok,"Not able to open mesh file to write"); newMesh->save( meshFile ); meshFile.close(); @@ -72,7 +73,8 @@ class DistributedGridIO<Functions::MeshFunction<MeshType>,LocalCopy,Device> CopyEntitiesHelper<MeshFunctionType>::Copy(meshFunction,newMeshFunction,localBegin,zeroCoord,localSize); File file; - file.open( fileName+String("-")+distrGrid->printProcessCoords()+String(".tnl"), IOMode::write ); + ok=file.open( fileName+String("-")+distrGrid->printProcessCoords()+String(".tnl"), IOMode::write ); + TNL_ASSERT_TRUE(ok,"Not able to open file to write"); bool ret=newMeshFunction.save(file); file.close(); @@ -110,7 +112,8 @@ class DistributedGridIO<Functions::MeshFunction<MeshType>,LocalCopy,Device> zeroCoord.setValue(0); File file; - file.open( fileName+String("-")+distrGrid->printProcessCoords()+String(".tnl"), IOMode::read ); + bool ok=file.open( fileName+String("-")+distrGrid->printProcessCoords()+String(".tnl"), IOMode::read ); + TNL_ASSERT_TRUE(ok,"Not able to open file to read"); bool result=newMeshFunction.boundLoad(file); file.close(); CopyEntitiesHelper<MeshFunctionType>::Copy(newMeshFunction,meshFunction,zeroCoord,localBegin,localSize); @@ -151,12 +154,13 @@ class DistributedGridIO_MPIIOBase MPI_Comm group=*((MPI_Comm*)(distrGrid->getCommunicationGroup())); MPI_File file; - MPI_File_open( group, + int ok=MPI_File_open( group, const_cast< char* >( fileName.getString() ), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &file); - + TNL_ASSERT_EQ(ok,0,"Open file falied"); + int written=save(file,meshFunction, data,0); MPI_File_close(&file); @@ -322,13 +326,14 @@ class DistributedGridIO_MPIIOBase MPI_Comm group=*((MPI_Comm*)(distrGrid->getCommunicationGroup())); MPI_File file; - MPI_File_open( group, + int ok=MPI_File_open( group, const_cast< char* >( fileName.getString() ), MPI_MODE_RDONLY, MPI_INFO_NULL, &file ); + TNL_ASSERT_EQ(ok,0,"Open file falied"); - bool ret= load(file, meshFunction, data,0)>0; + bool ret= load(file, meshFunction, data,0)>0; MPI_File_close(&file); diff --git a/src/TNL/Pointers/DevicePointer.h b/src/TNL/Pointers/DevicePointer.h index 194e68967ccc3368983ce32aeca22f3af1f4e2be..26ff692e4d0e36d9d6783fc91d4fb620ad832d5a 100644 --- a/src/TNL/Pointers/DevicePointer.h +++ b/src/TNL/Pointers/DevicePointer.h @@ -393,9 +393,7 @@ class DevicePointer< Object, Devices::Cuda > : public SmartPointer TNL_ASSERT( this->pointer, ); TNL_ASSERT( this->cuda_pointer, ); cudaMemcpy( (void*) this->cuda_pointer, (void*) this->pointer, sizeof( ObjectType ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) { - return false; - } + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/TNL/Pointers/SharedPointerCuda.h b/src/TNL/Pointers/SharedPointerCuda.h index 810d85e99125bea191cd112e88771b8ef2488322..42e46b257f9eb309f458f28ff3e46e591b03091b 100644 --- a/src/TNL/Pointers/SharedPointerCuda.h +++ b/src/TNL/Pointers/SharedPointerCuda.h @@ -544,9 +544,7 @@ class SharedPointer< Object, Devices::Cuda > : public SmartPointer #endif TNL_ASSERT( this->cuda_pointer, ); cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) { - return false; - } + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/TNL/Pointers/SmartPointersRegister.cpp b/src/TNL/Pointers/SmartPointersRegister.cpp index cd57dfe3439b0846f65f0bf8bfaf573cfcbd6e91..01641661c1ae008e6517232fc0bb56572f09ff5a 100644 --- a/src/TNL/Pointers/SmartPointersRegister.cpp +++ b/src/TNL/Pointers/SmartPointersRegister.cpp @@ -44,7 +44,8 @@ bool SmartPointersRegister::synchronizeDevice( int deviceId ) const auto & set = pointersOnDevices.at( deviceId ); for( auto&& it : set ) ( *it ).synchronize(); - return TNL_CHECK_CUDA_DEVICE; + TNL_CHECK_CUDA_DEVICE; + return true; } catch( const std::out_of_range& ) { return false; diff --git a/src/TNL/Pointers/UniquePointer.h b/src/TNL/Pointers/UniquePointer.h index 93a667c3553e65fc335c9a87e244d6e37dac536c..279f4535629ea144234040ef55570133a4dbeba8 100644 --- a/src/TNL/Pointers/UniquePointer.h +++ b/src/TNL/Pointers/UniquePointer.h @@ -238,8 +238,7 @@ class UniquePointer< Object, Devices::Cuda > : public SmartPointer if( this->modified() ) { cudaMemcpy( (void*) this->cuda_pointer, (void*) &this->pd->data, sizeof( Object ), cudaMemcpyHostToDevice ); - if( ! TNL_CHECK_CUDA_DEVICE ) - return false; + TNL_CHECK_CUDA_DEVICE; this->set_last_sync_state(); return true; } diff --git a/src/TNL/Solvers/Linear/CWYGMRES_impl.h b/src/TNL/Solvers/Linear/CWYGMRES_impl.h index 1f7d06c7221e1bb7588e817bf7940d7e099fbbe3..4989f50207ef3a0c2a98545f7d66fc7e6a824db7 100644 --- a/src/TNL/Solvers/Linear/CWYGMRES_impl.h +++ b/src/TNL/Solvers/Linear/CWYGMRES_impl.h @@ -398,18 +398,14 @@ hauseholder_generate( DeviceVector& Y, // aux = Y_{i-1}^T * y_i RealType aux[ i ]; Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct; - if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce + Containers::Algorithms::Multireduction< DeviceType >::reduce ( scalarProduct, i, size, Y.getData(), ldSize, y_i.getData(), - aux ) ) - { - std::cerr << "multireduction failed" << std::endl; - throw 1; - } + aux ); // [T_i]_{0..i-1} = - T_{i-1} * t_i * aux for( int k = 0; k < i; k++ ) { @@ -442,12 +438,8 @@ hauseholder_apply_trunc( HostVector& out, // here we duplicate the upper (m+1)x(m+1) submatrix of Y on host for fast access RealType* host_yi = &YL[ i * (restarting_max + 1) ]; RealType host_z[ i + 1 ]; - if( ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ) || - ! Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ) ) - { - std::cerr << "Failed to copy part of device vectors y_i or z to host buffer." << std::endl; - throw 1; - } + Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_yi, y_i.getData(), restarting_max + 1 ); + Containers::Algorithms::ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< RealType, RealType, IndexType >( host_z, z.getData(), i + 1 ); for( int k = 0; k <= i; k++ ) out[ k ] = host_z[ k ] - host_yi[ k ] * aux; } @@ -501,18 +493,14 @@ hauseholder_cwy_transposed( DeviceVector& z, // aux = Y_i^T * w RealType aux[ i + 1 ]; Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct; - if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce + Containers::Algorithms::Multireduction< DeviceType >::reduce ( scalarProduct, i + 1, size, Y.getData(), ldSize, w.getData(), - aux ) ) - { - std::cerr << "multireduction failed" << std::endl; - throw 1; - } + aux ); // aux = T_i^T * aux // Note that T_i^T is lower triangular, so we can overwrite the aux vector with the result in place diff --git a/src/TNL/Solvers/ODE/Euler_impl.h b/src/TNL/Solvers/ODE/Euler_impl.h index debfeb7c204b77c97d694d456ad048ff700ce303..0b9eed1f8e9e7a356ae78d21469cde49bb88c6b1 100644 --- a/src/TNL/Solvers/ODE/Euler_impl.h +++ b/src/TNL/Solvers/ODE/Euler_impl.h @@ -204,7 +204,7 @@ void Euler< Problem > :: computeNewTimeLevel( DofVectorPointer& u, &_u[ gridOffset ], this->cudaBlockResidue.getData() ); localResidue += this->cudaBlockResidue.sum(); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); TNL_CHECK_CUDA_DEVICE; } #endif diff --git a/src/TNL/Solvers/ODE/Merson_impl.h b/src/TNL/Solvers/ODE/Merson_impl.h index 53ccc9fd2d2f34aca2d53dbce957ee707756ed96..3323f4b742373738602d8a671fb2245cb10707d4 100644 --- a/src/TNL/Solvers/ODE/Merson_impl.h +++ b/src/TNL/Solvers/ODE/Merson_impl.h @@ -305,7 +305,7 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u, const IndexType threadsPerGrid = Devices::Cuda::getMaxGridSize() * cudaBlockSize.x; this->problem->getExplicitUpdate( time, tau, u, k1 ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) { @@ -313,10 +313,10 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u, const IndexType currentSize = min( size - gridOffset, threadsPerGrid ); computeK2Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_kAux[ gridOffset ] ); } - cudaThreadSynchronize(); + cudaDeviceSynchronize(); this->problem->applyBoundaryConditions( time + tau_3, kAux ); this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k2 ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) { @@ -324,10 +324,10 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u, const IndexType currentSize = min( size - gridOffset, threadsPerGrid ); computeK3Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k2[ gridOffset ], &_kAux[ gridOffset ] ); } - cudaThreadSynchronize(); + cudaDeviceSynchronize(); this->problem->applyBoundaryConditions( time + tau_3, kAux ); this->problem->getExplicitUpdate( time + tau_3, tau, kAux, k3 ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) { @@ -335,10 +335,10 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u, const IndexType currentSize = min( size - gridOffset, threadsPerGrid ); computeK4Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k3[ gridOffset ], &_kAux[ gridOffset ] ); } - cudaThreadSynchronize(); + cudaDeviceSynchronize(); this->problem->applyBoundaryConditions( time + 0.5 * tau, kAux ); this->problem->getExplicitUpdate( time + 0.5 * tau, tau, kAux, k4 ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx ++ ) { @@ -346,10 +346,10 @@ void Merson< Problem >::computeKFunctions( DofVectorPointer& u, const IndexType currentSize = min( size - gridOffset, threadsPerGrid ); computeK5Arg<<< cudaBlocks, cudaBlockSize >>>( currentSize, tau, &_u[ gridOffset ], &_k1[ gridOffset ], &_k3[ gridOffset ], &_k4[ gridOffset ], &_kAux[ gridOffset ] ); } - cudaThreadSynchronize(); + cudaDeviceSynchronize(); this->problem->applyBoundaryConditions( time + tau, kAux ); this->problem->getExplicitUpdate( time + tau, tau, kAux, k5 ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); #endif } } @@ -409,7 +409,7 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t &_k4[ gridOffset ], &_k5[ gridOffset ], &_kAux[ gridOffset ] ); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); eps = std::max( eps, kAux->max() ); } #endif @@ -468,7 +468,7 @@ void Merson< Problem >::computeNewTimeLevel( const RealType time, &_u[ gridOffset ], this->cudaBlockResidue.getData() ); localResidue += this->cudaBlockResidue.sum(); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); } this->problem->applyBoundaryConditions( time, u ); diff --git a/src/TNL/StaticVectorFor.h b/src/TNL/StaticVectorFor.h index 924a9c709601178e8850fb14102911d71eca5848..59af0fcb8256619d41014be4fa21023fee4679e2 100644 --- a/src/TNL/StaticVectorFor.h +++ b/src/TNL/StaticVectorFor.h @@ -16,34 +16,36 @@ namespace TNL { struct StaticVectorFor { - template < typename Index, + template< typename Index, typename Function, typename... FunctionArgs, - int dim> - static void exec( Containers::StaticVector<dim,Index> starts, Containers::StaticVector<dim,Index> ends, Function f, FunctionArgs... args ) - { - Containers::StaticVector<dim,Index> index; - if(dim==1) - { - for(index[0]=starts[0]; index[0]< ends[0];index[0]++ ) - f( index, args... ); - } - - if(dim==2) - { - for(index[1]=starts[1]; index[1]< ends[1];index[1]++ ) - for(index[0]=starts[0]; index[0]< ends[0];index[0]++ ) - f( index, args... ); - } - - if(dim==3) - { - for(index[2]=starts[2]; index[2]< ends[2];index[2]++ ) - for(index[1]=starts[1]; index[1]< ends[1];index[1]++ ) - for(index[0]=starts[0]; index[0]< ends[0];index[0]++ ) - f( index, args... ); - } - } + int dim > + static void exec( const Containers::StaticVector< dim, Index >& begin, + const Containers::StaticVector< dim, Index >& end, + Function f, + FunctionArgs... args ) + { + static_assert( 1 <= dim && dim <= 3, "unsupported dimension" ); + Containers::StaticVector< dim, Index > index; + + if( dim == 1 ) { + for( index[0] = begin[0]; index[0] < end[0]; index[0]++ ) + f( index, args... ); + } + + if( dim == 2 ) { + for( index[1] = begin[1]; index[1] < end[1]; index[1]++ ) + for( index[0] = begin[0]; index[0] < end[0]; index[0]++ ) + f( index, args... ); + } + + if( dim == 3 ) { + for( index[2] = begin[2]; index[2] < end[2]; index[2]++ ) + for( index[1] = begin[1]; index[1] < end[1]; index[1]++ ) + for( index[0] = begin[0]; index[0] < end[0]; index[0]++ ) + f( index, args... ); + } + } }; } // namespace TNL diff --git a/src/TNL/legacy/CMakeLists.txt b/src/TNL/legacy/CMakeLists.txt deleted file mode 100644 index e9f102901c3ae0cc5bef11560e2bd74b3381f947..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -#ADD_SUBDIRECTORY( core ) -#ADD_SUBDIRECTORY( diff ) -#ADD_SUBDIRECTORY( mesh ) -#ADD_SUBDIRECTORY( solvers ) - -SET( headers ) - -set( tnl_legacy_SOURCES - ${tnl_legacy_mesh_SOURCES} - ${tnl_legacy_solvers_SOURCES} - PARENT_SCOPE ) - -INSTALL( FILES ${headers} DESTINATION ${TNL_TARGET_INCLUDE_DIRECTORY}/legacy ) \ No newline at end of file diff --git a/src/TNL/legacy/benchmarks/ReorderCSR.cpp b/src/TNL/legacy/benchmarks/ReorderCSR.cpp deleted file mode 100644 index 8418e50f8db66cab4533fd4813943c6f667815bd..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/ReorderCSR.cpp +++ /dev/null @@ -1,609 +0,0 @@ -// $Id: ReorderCSR.c,v 1.1 2010/11/04 15:35:14 asuzuki Exp asuzuki $ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <getopt.h> - -#define EPS2 1.0e-20 - -#define AMD_ORDERING -#ifdef AMD_ORDERING -#include "amd.h" -#endif // #ifdef AMD_ORDERING - -typedef struct { - int col_ind; - double val; -} csr_data; - -#define AMD 4 -#define DESCEND 1 - -void init_CSR(double *val, int *col_ind, int *row_ptr, int nnz, int n); -void print_CSR(char *st, - double *val, int *col_ind, int *row_ptr, int n); -void make_order_index(int *ordering, int *row_ptr, int n, int descend); -int comp_int(const void *_a, const void *_b); -int comp_col_ind(const void *_a, const void *_b); - -void draw_csr(char *buf, int *row_ptr, int *csr_ind, int num_row); -int count_padding(int *nonzeros, int *reordering, int num_row, int block_size); - -void makeRgCSR(double *val_new, int *col_ind_new, int *nonzeros, int *grp_ptr, - double *val, int *col_ind, int *row_ptr, int group_size, int n); - -int countalignedRgCSR(int *row_ptr, int group_size, int n); - -void SpMVCSR(double *y, double *x, double *val, - int *col_ind, int *row_ptr, int num_row); - -void SpMVRgCSR(double *y, double *x, - double *val, int *col_ind, int *nonzeros, int *grp_ptr, - int block_size, int n); - -void reorder_csr_matrix(double *val_new, int *col_ind_new, int *row_ptr_new, - double *val, int *col_ind, int *row_ptr, - int *ordering, int *reordering, csr_data *work, - int num_row); - -int main(int argc, char **argv) -{ - double *val, *val_coo, *val_new; - int *col_coo, *row_coo, *col_ind, *row_ptr, *nonzeros; - int *col_ind_new, *row_ptr_new; - int *ordering, *reordering; - double *val_rgcsr, *val_rgcsr_new; - double *x, *xx, *y, *y_rgcsr, *y_rgcsr_new; - - int *col_ind_rgcsr, *nonzeros_rgcsr, *grp_ptr_rgcsr; - int *col_ind_rgcsr_new, *nonzeros_rgcsr_new, *grp_ptr_rgcsr_new; - int num_row, num_col, num_nz, num_nz0; - int jtmp; - FILE *fp; - char in_file[256], out_file[256], buf[256]; - int block_size = 32; - int max_nonzeros, min_nonzeros, padding; - double mean_nonzeros; - int verbose = 0, graph_output = 0; - int method_ordering = DESCEND; - int flag_symmetric = 0; - int c; - // clear file name - in_file[0] = out_file[0] = 0; - - while ((c = getopt(argc, argv, - "ADGvg:i:o:")) != EOF) { - switch(c) { - case 'G': - graph_output = 1; - break; - case 'i': - strcpy(in_file, optarg); - break; - case 'o': - strcpy(out_file, optarg); - break; - case 'A': - method_ordering = AMD; - break; - case 'D': - method_ordering = DESCEND; - break; - case 'g': - block_size = atoi(optarg); - break; - case 'v': - verbose = 1; - break; - case 'h': - fprintf(stderr, - "ReorderCSR -h -A -D -v -i [infile] -o [outfile] -g [group_size]\n"); - break; - } - } - - if (in_file[0] == 0 || out_file[0] == 0) { - fprintf(stderr, "matrix file name is incorrect\n"); - } - if((fp = fopen(in_file, "r")) == NULL) { - exit(-1); - } - - while (1) { - fgets(buf, 256, fp); - if (strstr(buf, "%%MatrixMarket") != NULL && - strstr(buf, "symmetric") != NULL) { - flag_symmetric = 1; - if(verbose) { - printf("symmetric\n"); - } - } - if (buf[0] != '%') { - break; - } - } - sscanf(buf, "%d %d %d", &num_row, &num_col, &num_nz); - - col_coo = (int *)malloc(sizeof(int) * num_nz); - row_coo = (int *)malloc(sizeof(int) * num_nz); - val_coo = (double *)malloc(sizeof(double) * num_nz); - - for (int j = 0; j < num_nz; j++) { - fgets(buf, 256, fp); - sscanf(buf, "%d %d %lf", &row_coo[j], &col_coo[j], &val_coo[j]); - // for C index array style starting at 0 - row_coo[j]--; - col_coo[j]--; - } - fclose(fp); - - // count diagonal parts - num_nz0 = num_nz; - if (flag_symmetric) { - num_nz = num_nz * 2; - int ktmp = 0; - for (int i = 0; i < num_nz0; i++) { - if (row_coo[i] == col_coo[i]) { - ktmp++; - } - } - num_nz -= ktmp; - } - - col_ind = (int *)malloc(sizeof(int) * num_nz); - col_ind_new = (int *)malloc(sizeof(int) * num_nz); - val = (double *)malloc(sizeof(double) * num_nz); - val_new = (double *)malloc(sizeof(double) * num_nz); - row_ptr = (int *)malloc(sizeof(int) * (num_row + 1)); - row_ptr_new = (int *)malloc(sizeof(int) * (num_row + 1)); - nonzeros = (int *)malloc(sizeof(int) * num_row); - ordering = (int *)malloc(sizeof(int) * num_row); - reordering = (int *)malloc(sizeof(int) * num_row); - - if(verbose) { - printf("%d %d %d\n", num_row, num_col, num_nz); - } - - for (int i = 0; i < num_row; i++) { - nonzeros[i] = 0; - } - for (int j = 0; j < num_nz0; j++) { - nonzeros[row_coo[j]]++; - if (flag_symmetric) { - if (row_coo[j] != col_coo[j]) { - nonzeros[col_coo[j]]++; - } - } - } - - row_ptr[0] = 0; - for (int i = 0; i < num_row; i++) { - row_ptr[i + 1] = row_ptr[i] + nonzeros[i]; - } - - for (int i = 0; i < num_row; i++) { - reordering[i] = i; - } - - padding = count_padding(nonzeros, reordering, num_row, block_size); - if(verbose) { - printf("original: %d\n", padding); - } - // make CSR format - for (int i = 0; i < num_row; i++) { - nonzeros[i] = 0; - } - for (int j = 0; j < num_nz0; j++) { - int ii = row_coo[j]; - int jj = col_coo[j]; - int ktmp = row_ptr[ii] + nonzeros[ii]; - col_ind[ktmp] = jj; - val[ktmp] = val_coo[j]; - nonzeros[ii]++; - if (flag_symmetric) { - if (ii != jj) { - ktmp = row_ptr[jj] + nonzeros[jj]; - col_ind[ktmp] = ii; - val[ktmp] = val_coo[j]; - nonzeros[jj]++; - } - } - } - - max_nonzeros = 0; - for (int i = 0; i < num_row; i++) { - if (max_nonzeros < nonzeros[i]) { - max_nonzeros = nonzeros[i]; - } - } - csr_data *work; - work = (csr_data *)malloc(max_nonzeros * sizeof(csr_data)); - - // sort column index in each row - for (int i = 0; i < num_row; i++) { - int ktmp = 0; - for (int k = row_ptr[i]; k < row_ptr[i + 1]; k++) { - work[ktmp].col_ind = col_ind[k]; - work[ktmp].val = val[k]; - ktmp++; - } - qsort(work, nonzeros[i], sizeof(csr_data), comp_col_ind); - ktmp = 0; - for (int k = row_ptr[i]; k < row_ptr[i + 1]; k++) { - col_ind[k] = work[ktmp].col_ind; - val[k] = work[ktmp].val; - ktmp++; - } - } - strcpy(buf, in_file); - strcat(buf, ".ps"); - - if (graph_output) { - draw_csr(buf, row_ptr, col_ind, num_row); - } - - strcpy(buf, out_file); - - switch(method_ordering) { -#ifdef AMD_ORDERING - case AMD: - { - double Control [AMD_CONTROL], Info [AMD_INFO]; - - amd_defaults(Control) ; - amd_control(Control) ; - (void)amd_order(num_row, row_ptr, col_ind, reordering, Control, Info); - // make inverse mapping : old -> new - if (verbose) { - amd_info(Info); - } - for (int i = 0; i < num_row; i++) { - ordering[reordering[i]] = i; - } - strcat(buf, ".amd.ps"); - } - break; -#endif - case DESCEND: - make_order_index(ordering, row_ptr, num_row, 1); - for (int i = 0; i < num_row; i++) { - reordering[ordering[i]] = i; - } - strcat(buf, ".descend.ps"); - break; - } - - // ordering[i] : old -> new, new index with dreasing order of nonzro - - padding = count_padding(nonzeros, reordering, num_row, block_size); - - if(verbose) { - switch(method_ordering) { - case AMD: - printf("amd: "); - break; - case DESCEND: - printf("descending:"); - break; - } - printf("%d\n", padding); - } - reorder_csr_matrix(val_new, col_ind_new, row_ptr_new, - val, col_ind, row_ptr, - ordering, reordering, work, num_row); - - - if((fp = fopen(out_file, "w")) == NULL) { - exit(-1); - } - fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); - fprintf(fp, "%d %d %d\n", num_row, num_row, row_ptr_new[num_row]); - for (int i = 0; i < num_row; i++) { - for (int j = row_ptr_new[i]; j < row_ptr_new[i + 1]; j++) { - fprintf(fp, "%d %d %g\n", (i + 1), (col_ind_new[j] + 1), val_new[j]); - } - } - fclose(fp); - - if (graph_output) { - draw_csr(buf, row_ptr_new, col_ind_new, num_row); - } - - min_nonzeros = num_row; - mean_nonzeros = 0.0; - for (int i = 0; i < num_row; i++) { - mean_nonzeros += (double)nonzeros[i]; - if (min_nonzeros > nonzeros[i]) { - min_nonzeros = nonzeros[i]; - } - } - mean_nonzeros /= (double)num_row; - if (verbose) { - printf("max nonzeros = %d mean= %g min = %d\n", - max_nonzeros, mean_nonzeros, min_nonzeros); - } -} - -void print_CSR(char *st, - double *val, int *col_ind, int *row_ptr, int n) -{ - printf("[ %s ]\n", st); - for (int i = 0; i < n; i++) { - printf("%d : [%d] ", i, row_ptr[i + 1] - row_ptr[i]); - for (int k = row_ptr[i]; k < row_ptr[i + 1]; k++) { - printf(" %g:%d ", val[k], col_ind[k]); - } - printf("\n"); - } -} - - -void make_order_index(int *ordering, int *row_ptr, int n, int descend) -{ - int *slices, *slice_offset; - int mn; - - // find maximum nonzeros from all rows - mn = 0; - for (int i = 0; i < n; i++) { - int non_zeros = row_ptr[i + 1] - row_ptr[i]; - if (mn < non_zeros) { - mn = non_zeros; - } - } - // prepare working array : this suppose row without element - slices = (int *)malloc(sizeof(int) * (mn + 1)); - slice_offset = (int *)malloc(sizeof(int) * (mn + 1)); - for (int i = 0; i <= mn; i++) { - slices[i] = 0; - slice_offset[i] = 0; - } - // slices[i] keeps number of indices of rows whos width is i - for (int i = 0; i < n; i++) { - int non_zeros = row_ptr[i + 1] - row_ptr[i]; - slices[non_zeros]++; - } - // making blocks in decreasing order of nonzeros - if (descend) { - slice_offset[mn] = 0; - for (int i = mn - 1; i >= 0; i--) { - slice_offset[i] = slice_offset[i + 1] + slices[i + 1]; - } - } - else { - slice_offset[0] = 0; - for (int i = 0; i < mn; i++) { - slice_offset[i + 1] = slice_offset[i] + slices[i]; - } - } - - // this keeps original ordeing wihtin a block - for (int i = 0; i < n; i++) { - int non_zeros = row_ptr[i + 1] - row_ptr[i]; - ordering[i] = slice_offset[non_zeros]++; - } - - free(slices); - free(slice_offset); - -} - -int comp_int(const void *_a, const void *_b) { - // cast to deal with arguments defined as void * - int a = *(int *)_a; - int b = *(int *)_b; - - if (a < b) { - return -1; - } else if (a > b) { - return 1; - } - else { - return 0; - } -} - - -int comp_col_ind(const void *_a, const void *_b) { - // cast to deal with arguments defined as void * - int a = (*(csr_data *)_a).col_ind; - int b = (*(csr_data *)_b).col_ind; - - if (a < b) { - return -1; - } else if (a > b) { - return 1; - } - else { - return 0; - } -} - - -void draw_csr(char *buf, int *row_ptr, int *col_ind, int num_row) -{ - FILE *fp; - - if((fp = fopen(buf, "w")) == NULL) { - exit(-1); - } - fprintf(fp, "%%!PS-Adobe-3.0 EPSF-3.0\n%%%%BoundingBox: 5 5 395 395\n"); - fprintf(fp, "/rr { %g } def\n", 0.45 * 380.0 / (double)(num_row + 2)); - fprintf(fp, "/n { newpath } def\n"); - fprintf(fp, "/rl { rlineto } def\n"); - fprintf(fp, "/m { moveto } def\n"); - fprintf(fp,"n 10 10 m 380 0 rl 0 380 rl -380 0 rl 0 -380 rl closepath 0.85 setgray fill\n"); - for (int i = 0; i < num_row; i++) { - for (int j = row_ptr[i]; j < row_ptr[i + 1]; j++) { - fprintf(fp,"n %g %g rr 0 360 arc 0 setgray fill\n", - 10.0 + (double)col_ind[j] / (double)(num_row + 2) * 380.0, - 390.0 - (double)i / (double)(num_row + 2) * 380.0); - } - } - fprintf(fp, "showpage\n"); - fclose(fp); -} - - -int count_padding(int *nonzeros, int *reordering, int num_row, int block_size) -{ - // count artificial zeros - int padding = 0; - - for (int k = 0; k < num_row; k += block_size) { - int block_max = 0; - for (int j = 0; j < block_size; j++) { - if (k + j >= num_row) { - break; - } - int kj = reordering[k + j]; - if (block_max < nonzeros[kj]) { - block_max = nonzeros[kj]; - } - } - for (int j = 0; j < block_size; j++) { - if (k + j >= num_row) { - break; - } - int kj = reordering[k + j]; - padding += block_max - nonzeros[kj]; - } - } - - return padding; -} - -int countalignedRgCSR(int *row_ptr, int group_size, int n) -{ - int aligned_max; - - aligned_max = 0; - // find maximumn number of nonzeros in each group - for (int i = 0; i < n; i += group_size) { - int ntmp = 0; - for (int k = 0; k < group_size; k++) { - int ik = i + k; - if (ik >= n) { - break; - } - int mtmp = row_ptr[ik + 1] - row_ptr[ik]; - if (ntmp < mtmp) { - ntmp = mtmp; - } - } - aligned_max += ntmp * group_size; - } - return aligned_max; -} - -void makeRgCSR(double *val_new, int *col_ind_new, int *nonzeros, int *grp_ptr, - double *val, int *col_ind, int *row_ptr, int group_size, int n) -{ - int jtmp; - - jtmp = 0; - grp_ptr[0] = 0; - for (int i = 0; i < n; i+= group_size) { - int current_group = group_size; - if (i + group_size > n) { - current_group = n % group_size; - } - int ntmp = 0; - for (int k = 0; k < current_group; k++) { - int ik = i + k; - int mtmp = row_ptr[ik + 1] - row_ptr[ik]; - if (ntmp < mtmp) { - ntmp = mtmp; - } - } - int ig = i / group_size; - if (ig < (n / group_size + (n % group_size != 0) - 1)) { - grp_ptr[ig + 1] = grp_ptr[ig] + ntmp * group_size; - } - for (int j = 0; j < ntmp; j++) { - for (int k = 0; k < current_group; k++) { - int ik = i + k; - if (j < (row_ptr[ik + 1] - row_ptr[ik])) { - col_ind_new[jtmp] = col_ind[row_ptr[ik] + j]; - val_new[jtmp] = val[row_ptr[ik] + j]; - } - else { - col_ind_new[jtmp] = (-1); - val_new[jtmp] = 0.0; - } - jtmp++; - } // loop : k - } // loop : j - } - - for (int i = 0; i < n; i++) { - nonzeros[i] = row_ptr[i + 1] - row_ptr[i]; - } -} - -void SpMVCSR(double *y, double *x, double *val, - int *col_ind, int *row_ptr, int num_row) -{ - double stmp; - for (int i = 0; i < num_row; i++) { - stmp = 0.0; - for (int j = row_ptr[i]; j < row_ptr[i + 1]; j++) { - stmp += x[col_ind[j]] * val[j]; - } - y[i] = stmp; - } -} - -void SpMVRgCSR(double *y, double *x, - double *val, int *col_ind, int *nonzeros, int *grp_ptr, - int block_size, int n) -{ - int num_blocks = n / block_size + (n % block_size != 0); - for (int j = 0; j < num_blocks; j++) { - for (int k = 0; k < block_size; k++) { - int irow = j * block_size + k; - if (irow >= n) { - return; - } - int ptr = grp_ptr[j] + k; - int crnt_grp_size = block_size; - if ((j + 1) * block_size > n) { - crnt_grp_size = n % block_size; - } - double stmp = 0.0; - for (int i = 0; i < nonzeros[irow]; i++) { - stmp += val[ptr] * x[col_ind[ptr]]; - ptr += crnt_grp_size; - } - y[irow] = stmp; - } - } -} - -void reorder_csr_matrix(double *val_new, int *col_ind_new, int *row_ptr_new, - double *val, int *col_ind, int *row_ptr, - int *ordering, int *reordering, csr_data *work, - int num_row) -{ - // csr_data *work is allocated as max_j (row_ptr[j + 1] - row_ptr[j]) sized - int jtmp = 0; - row_ptr_new[0] = 0; - for (int i = 0; i < num_row; i++) { - int j = reordering[i]; - int ktmp = 0; - for (int k = row_ptr[j]; k < row_ptr[j + 1]; k++) { - work[ktmp].col_ind = ordering[col_ind[k]]; - work[ktmp].val = val[k]; - ktmp++; - } - int itmp = row_ptr[j + 1] - row_ptr[j]; - qsort(work, itmp, sizeof(csr_data), comp_col_ind); - ktmp = 0; - for (int k = 0; k < itmp; k++) { - val_new[jtmp] = work[k].val; - col_ind_new[jtmp] = work[k].col_ind; - jtmp++; - } - row_ptr_new[i + 1] = jtmp; - } -} diff --git a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cpp b/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cpp deleted file mode 100644 index 6d543d7c93bf817ce83d267d735b6fea732ee231..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cpp +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - matrix-solvers-benchmark.cpp - description - ------------------- - begin : Jan 8, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "matrix-solvers-benchmark.h" diff --git a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cu b/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cu deleted file mode 100644 index 5afd986ff0c0224bc51e40b3da4ff18da64ee0eb..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.cu +++ /dev/null @@ -1,11 +0,0 @@ -/*************************************************************************** - matrix-solvers-benchmark.cu - description - ------------------- - begin : Oct 20, 2012 - copyright : (C) 2012 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include "matrix-solvers-benchmark.h" diff --git a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h b/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h deleted file mode 100644 index 1bb3dfd96c4d807af244c5f3424990b9278f1824..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/matrix-solvers-benchmark.h +++ /dev/null @@ -1,405 +0,0 @@ -/*************************************************************************** - matrix-solvers-benchmark.h - description - ------------------- - begin : Jan 8, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef MATRIXSOLVERSBENCHMARK_H_ -#define MATRIXSOLVERSBENCHMARK_H_ - -#include <fstream> -#include <TNL/File.h> -#include <TNL/Object.h> -#include <TNL/Devices/Cuda.h> -#include <TNL/Exceptions/CudaSupportMissing.h> -#include <TNL/Config/ConfigDescription.h> -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Matrices/CSR.h> -#include <TNL/legacy/matrices/tnlRgCSR.h> -#include <TNL/Solvers/IterativeSolverMonitor.h> -#include <TNL/Solvers/Linear/stationary/SOR.h> -#include <TNL/Solvers/Linear/CG.h> -#include <TNL/Solvers/Linear/BICGStab.h> -#include <TNL/Solvers/Linear/GMRES.h> -#include <TNL/Solvers/Linear/TFQMR.h> -#ifdef HAVE_PETSC - #include <petsc.h> -#endif - -#include "tnlConfig.h" -const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-matrix-solvers-benchmark.cfg.desc"; - -void writeTestFailToLog( const Config::ParameterContainer& parameters ) -{ - const String& logFileName = parameters. getParameter< String >( "log-file" ); - std::fstream logFile; - if( logFileName != "" ) - { - logFile. open( logFileName. getString(), std::ios::out | std::ios::app ); - if( ! logFile ) - std::cerr << "Unable to open the log file " << logFileName << std::endl; - else - { - String bgColor( "#FF0000" ); - logFile << " <td bgcolor=" << bgColor << "> N/A </td> " << std::endl - << " <td bgcolor=" << bgColor << "> N/A </td> " << std::endl - << " <td bgcolor=" << bgColor << "> N/A </td> " << std::endl; - logFile. close(); - } - } -} - -template< typename Solver, typename Matrix, typename Vector > -bool benchmarkSolver( const Config::ParameterContainer& parameters, - Solver& solver, - const Matrix& matrix, - const Vector& b, - Vector& x ) -{ - typedef typename Matrix :: RealType RealType; - typedef typename Matrix :: DeviceType DeviceType; - typedef typename Matrix :: IndexType IndexType; - - const RealType& maxResidue = parameters. getParameter< double >( "max-residue" ); - const IndexType& size = matrix. getRows(); - const IndexType nonZeros = matrix. getNumberOfMatrixElements(); - //const IndexType maxIterations = size * ( ( double ) size * size / ( double ) nonZeros ); - const IndexType maxIterations = size; - std::cout << "Setting max. number of iterations to " << maxIterations << std::endl; - - solver. setMatrix( matrix ); - solver. setMaxIterations( maxIterations ); - solver. setMaxResidue( maxResidue ); - solver. setMinResidue( 1.0e9 ); - IterativeSolverMonitor< RealType, IndexType > solverMonitor; - solver. setSolverMonitor( solverMonitor ); - solver. setRefreshRate( 10 ); - solverMonitor. resetTimers(); - solver. solve( b, x ); - - bool solverConverged( solver. getResidue() < maxResidue ); - const String& logFileName = parameters. getParameter< String >( "log-file" ); - std::fstream logFile; - if( logFileName != "" ) - { - logFile. open( logFileName. getString(), std::ios::out | std::ios::app ); - if( ! logFile ) - std::cerr << "Unable to open the log file " << logFileName << std::endl; - else - { - String bgColor( "#FF0000" ); - if( solver. getResidue() < 1 ) - bgColor="#FF8888"; - if( solver. getResidue() < maxResidue ) - { - bgColor="#88FF88"; - } - double cpuTime = solverMonitor. getCPUTime(); - double realTime = solverMonitor. getRealTime(); - logFile << " <td bgcolor=" << bgColor << "> " << solver. getResidue() << " </td> " << std::endl - << " <td bgcolor=" << bgColor << "> " << solver. getIterations() << " </td> " << std::endl - << " <td bgcolor=" << bgColor << "> " << cpuTime << " </td> " << std::endl; - logFile. close(); - } - } - return solverConverged; - -} - -template< typename Matrix, typename Vector > -bool benchmarkMatrixOnDevice( const Config::ParameterContainer& parameters, - const Matrix& matrix, - const Vector& b, - Vector& x ) -{ - typedef typename Matrix :: RealType RealType; - typedef typename Matrix :: DeviceType DeviceType; - typedef typename Matrix :: IndexType IndexType; - - const String& solverClass = parameters. getParameter< String >( "solver-class" ); - if( solverClass == "tnl" ) - { - const String& solverName = parameters. getParameter< String >( "solver-name" ); - IndexType iterations( 0 ); - RealType residue( 0.0 ); - bool converged( false ); - if( solverName == "sor" ) - { - SOR< Matrix > solver; - const RealType& sorOmega = parameters. getParameter< double >( "sor-omega" ); - solver. setOmega( sorOmega ); - return benchmarkSolver( parameters, solver, matrix, b, x ); - } - if( solverName == "cg" ) - { - CG< Matrix > solver; - return benchmarkSolver( parameters, solver, matrix, b, x ); - } - if( solverName == "bicgstab" ) - { - BICGStab< Matrix > solver; - return benchmarkSolver( parameters, solver, matrix, b, x ); - } - if( solverName == "gmres" ) - { - GMRES< Matrix > solver; - const IndexType& gmresRestarting = parameters. getParameter< int >( "gmres-restarting" ); - solver. setRestarting( gmresRestarting ); - return benchmarkSolver( parameters, solver, matrix, b, x ); - } - if( solverName == "tfqmr" ) - { - TFQMR< Matrix > solver; - return benchmarkSolver( parameters, solver, matrix, b, x ); - } - std::cerr << "Unknown solver " << solverName << std::endl; - return false; - } - if( solverClass == "petsc" ) - { -#ifndef HAVE_PETSC - std::cerr << "PETSC is not installed on this system." << std::endl; - writeTestFailToLog( parameters ); - return false; -#else - if( DeviceType :: getDeviceType() != "Devices::Host" ) - { - std::cerr << "PETSC tests can run only on host. The current device is " << DeviceType :: getDeviceType() << std::endl; - writeTestFailToLog( parameters ); - return false; - } - /**** - * Set-up the PETSC matrix - */ - const IndexType n = matrix. getSize(); - Mat A; - MatCreate( PETSC_COMM_WORLD, &A ); - MatSetType( A, MATAIJ ); - MatSetSizes( A, PETSC_DECIDE, PETSC_DECIDE, n, n ); - MatSetUp( A ); - - /**** - * Inserting data - */ - Array< PetscScalar > petscVals; - Array< PetscInt > petscCols; - petscVals. setSize( n ); - petscCols. setSize( n ); - for( IndexType i = 0; i < n; i ++ ) - { - const IndexType rowLength = matrix. getRowLength( i ); - for( IndexType j = 0; j < rowLength; j ++ ) - { - petscVals. setElement( j, matrix. getRowValues( i )[ j ] ); - petscCols. setElement( j, matrix. getRowColumnIndexes( i )[ j ] ); - } - MatSetValues( A, - 1, // setting one row - &i, // index of thew row - rowLength, - petscCols. getData(), - petscVals. getData(), - INSERT_VALUES ); - } - MatAssemblyBegin( A, MAT_FINAL_ASSEMBLY ); - MatAssemblyEnd( A, MAT_FINAL_ASSEMBLY ); - - /**** - * Check matrix conversion - */ - /*for( IndexType i = 0; i < n; i++ ) - for( IndexType j = 0; j < n; j ++ ) - { - PetscScalar value; - MatGetValues( A, 1, &i, 1, &j, &value ); - if( matrix. getElement( i, j ) != value ) - { - std::cerr << "Conversion to PETSC matrix was not correct at position " << i << " " << j << "." << std::endl; - std::cerr << "Values are " << value << " and " << matrix. getElement( i, j ) << std::endl; - return false; - } - } - std::cerr << "PETSC CONVERSION WAS OK!!!" << std::endl; - return true;*/ - - Vec petscB, petscX; - KSP ksp; - KSPCreate( PETSC_COMM_WORLD, &ksp ); - - -#endif - } - -} - - -template< typename Real, typename Index > -bool benchmarkMatrix( const Config::ParameterContainer& parameters ) -{ - /**** - * Loading the matrix from the input file - */ - typedef CSR< Real, Devices::Host, Index > csrMatrixType; - String inputFile = parameters. getParameter< String >( "input-file" ); - csrMatrixType csrMatrix; - if( ! csrMatrix. load( inputFile ) ) - { - std::cerr << "Unable to load file " << inputFile << std::endl; - return false; - } - - /**** - * Writing matrix statistics - */ - String matrixStatsFileName = parameters. getParameter< String >( "matrix-stats-file" ); - if( matrixStatsFileName ) - { - std::fstream matrixStatsFile; - matrixStatsFile. open( matrixStatsFileName. getString(), std::ios::out ); - if( ! matrixStatsFile ) - { - std::cerr << "Unable to open matrix statistics file " << matrixStatsFileName << std::endl; - return false; - } - matrixStatsFile << " <td> " << csrMatrix. getRows() << " </td> " << std::endl - << " <td> " << csrMatrix. getNumberOfMatrixElements() << " </td> " << std::endl; - matrixStatsFile. close(); - } - - /**** - * Setting up the linear problem - */ - const Index size = csrMatrix. getRows(); - std::cout << "Matrix size is " << size << std::endl; - Vector< Real, Devices::Host, Index > x1( "matrix-solvers-benchmark:x1" ); - Vector< Real, Devices::Host, Index > x( "matrix-solvers-benchmark:x" ); - Vector< Real, Devices::Host, Index > b( "matrix-solvers-benchmark:b" ); - if( ! x1. setSize( size ) || - ! x. setSize( size ) || - ! b. setSize( size ) ) - { - std::cerr << "Sorry, I do not have enough memory for the benchmark." << std::endl; - return false; - } - x1. setValue( ( Real ) 1.0 ); - x. setValue( ( Real ) 0.0 ); - csrMatrix. vectorProduct( x1, b ); - - const String device = parameters. getParameter< String >( "device" ); - if( device == "host" ) - if( ! benchmarkMatrixOnDevice( parameters, csrMatrix, b, x ) ) - return false; - - if( device == "cuda" ) - { -#ifdef HAVE_CUDA - tnlRgCSR< Real, Devices::Cuda, Index > rgCSR( "matrix-solvers-benchmark:rgCSR" ); - // FIX THIS - //rgCSR = csrMatrix; - /*Vector< Real, Devices::Cuda, Index > cudaX( "matrix-solvers-benchmark:cudaX" ); - Vector< Real, Devices::Cuda, Index > cudaB( "matrix-solvers-benchmark:cudaB" ); - cudaX. setLike( x ); - cudaX = x; - cudaB. setLike( b ); - cudaB = b; - if( ! benchmarkMatrixOnDevice( parameters, rgCSR, cudaB, cudaX ) ) - return false; - x = cudaX;*/ -#else - throw Exceptions::CudaSupportMissing(); -#endif - } - - std::cout << std::endl << "L1 diff. norm = " << x. differenceLpNorm( x1, ( Real ) 1.0 ) - << " L2 diff. norm = " << x. differenceLpNorm( x1, ( Real ) 2.0 ) - << " Max. diff. norm = " << x. differenceMax( x1 ) << std::endl; - return true; -} - -int main( int argc, char* argv[] ) -{ -#ifdef HAVE_PETSC - PetscInitialize( &argc, &argv, ( char* ) 0, ( char* ) 0 ); -#endif - /**** - * Parsing command line arguments ... - */ - Config::ParameterContainer parameters; - Config::ConfigDescription conf_desc; - - if( conf_desc.parseConfigDescription( configFile ) != 0 ) - return 1; - if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) - { - conf_desc.printUsage( argv[ 0 ] ); - return 1; - } - String inputFile = parameters. getParameter< String >( "input-file" ); - String str_input_mtx_file = parameters. getParameter< String >( "input-mtx-file" ); - String log_file_name = parameters. getParameter< String >( "log-file" ); - double stop_time = parameters. getParameter< double >( "stop-time" ); - int verbose = parameters. getParameter< int >( "verbose"); - - /**** - * Checking a type of the input data - */ - String objectType; - if( ! getObjectType( inputFile, objectType ) ) - { - std::cerr << "Unable to detect object type in " << inputFile << std::endl; - return EXIT_FAILURE; - } - Containers::List< String > parsedObjectType; - parseObjectType( objectType, - parsedObjectType ); - String objectClass = parsedObjectType[ 0 ]; - if( objectClass != "CSR" ) - { - std::cerr << "I am sorry, I am expecting CSR in the input file but I found " << objectClass << "." << std::endl; - return EXIT_FAILURE; - } - - String precision = parsedObjectType[ 1 ]; - //String indexing = parsedObjectType[ 3 ]; - if( precision == "float" ) - if( ! benchmarkMatrix< float, int >( parameters ) ) - { -#ifdef HAVE_PETSC - PetscFinalize(); -#endif - return EXIT_FAILURE; - } - - if( precision == "double" ) - if( ! benchmarkMatrix< double, int >( parameters ) ) - { -#ifdef HAVE_PETSC - PetscFinalize(); -#endif - return EXIT_FAILURE; - } - - std::fstream log_file; - if( log_file_name ) - { - log_file. open( log_file_name. getString(), std::ios::out | std::ios::app ); - if( ! log_file ) - { - std::cerr << "Unable to open log file " << log_file_name << " for appending logs." << std::endl; - return EXIT_FAILURE; - } - std::cout << "Writing to log file " << log_file_name << "..." << std::endl; - } -#ifdef HAVE_PETSC - PetscFinalize(); -#endif - return EXIT_SUCCESS; - -} - - -#endif /* MATRIXSOLVERSBENCHMARK_H_ */ diff --git a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cpp b/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cpp deleted file mode 100644 index 0811d90de8825194b92c492ff0f19c61d75a03d6..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cpp +++ /dev/null @@ -1,12 +0,0 @@ -/*************************************************************************** - sparse-matrix-benchmark.cpp - description - ------------------- - begin : Jul 27, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - - -#include "sparse-matrix-benchmark.h" diff --git a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cu b/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cu deleted file mode 100644 index 524aca53bd8ff5d7db254032a90e9dde3dc1edee..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.cu +++ /dev/null @@ -1,12 +0,0 @@ -/*************************************************************************** - sparse-matrix-benchmark.cu - description - ------------------- - begin : Jul 27, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - - -#include "sparse-matrix-benchmark.h" diff --git a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.h b/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.h deleted file mode 100644 index 10bc59794c898aa987b35b8b640eec778d21594d..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/sparse-matrix-benchmark.h +++ /dev/null @@ -1,416 +0,0 @@ -/*************************************************************************** - sparse-matrix-benchmark.h - description - ------------------- - begin : Jul 27, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef SPARSEMATRIXBENCHMARK_H_ -#define SPARSEMATRIXBENCHMARK_H_ - -#include <fstream> -#include <iomanip> -#include <TNL/Config/ConfigDescription.h> -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Matrices/Dense.h> -#include <TNL/Matrices/Ellpack.h> -#include <TNL/Matrices/SlicedEllpack.h> -#include <TNL/Matrices/ChunkedEllpack.h> -#include <TNL/Matrices/CSR.h> -#include <TNL/Matrices/MatrixReader.h> -#include <TNL/Math.h> -#include "tnlSpmvBenchmark.h" - -#include "tnlConfig.h" -const char configFile[] = TNL_CONFIG_DIRECTORY "tnl-sparse-matrix-benchmark.cfg.desc"; - - -/* -double bestCudaRgCSRGflops( 0 ); - -template< typename Real > -void benchmarkRgCSRFormat( const CSR< Real, Devices::Host, int >& csrMatrix, - const Vector< Real, Devices::Host >& refX, - const Vector< Real, Devices::Cuda >& cudaX, - Vector< Real, Devices::Host >& refB, - bool formatTest, - const int maxIterations, - const bool useAdaptiveGroupSize, - const tnlAdaptiveGroupSizeStrategy adaptiveGroupSizeStrategy, - const tnlSpmvBenchmarkCSR< Real, int >& csrMatrixBenchmark, - bool verbose, - const String& inputMtxFile, - const String& logFileName, - std::fstream& logFile ) -{ - tnlSpmvBenchmarkRgCSR< Real, Devices::Host, int > hostRgCsrMatrixBenchmark; - for( int groupSize = 16; groupSize <= 64; groupSize *= 2 ) - { - - hostRgCsrMatrixBenchmark. setGroupSize( groupSize ); - hostRgCsrMatrixBenchmark. setUseAdaptiveGroupSize( useAdaptiveGroupSize ); - hostRgCsrMatrixBenchmark. setAdaptiveGroupSizeStrategy( adaptiveGroupSizeStrategy ); - hostRgCsrMatrixBenchmark. setup( csrMatrix ); - if( formatTest ) - hostRgCsrMatrixBenchmark. testMatrix( csrMatrix, verbose ); - hostRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - //hostRgCsrMatrixBenchmark. runBenchmark( refX, refB, verbose ); - hostRgCsrMatrixBenchmark. tearDown(); - - if( logFileName ) - hostRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - true ); - - tnlSpmvBenchmarkRgCSR< Real, Devices::Cuda, int > cudaRgCsrMatrixBenchmark; - cudaRgCsrMatrixBenchmark. setGroupSize( groupSize ); - cudaRgCsrMatrixBenchmark. setup( csrMatrix ); - cudaRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - for( int cudaBlockSize = 32; cudaBlockSize <= 256; cudaBlockSize *= 2 ) - { - cudaRgCsrMatrixBenchmark. setCudaBlockSize( cudaBlockSize ); - if( formatTest ) - cudaRgCsrMatrixBenchmark. testMatrix( csrMatrix, verbose ); - cudaRgCsrMatrixBenchmark. runBenchmark( cudaX, refB, verbose ); - if( logFileName ) - cudaRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - false ); - bestCudaRgCSRGflops = max( bestCudaRgCSRGflops, cudaRgCsrMatrixBenchmark. getGflops() ); - } - cudaRgCsrMatrixBenchmark. tearDown(); - } -} -*/ - -template< typename RealType > -bool benchmarkMatrix( const Config::ParameterContainer& parameters ) -{ - /**** - * Read the CSR matrix ... - */ - typedef CSR< RealType, Devices::Host, int > CsrMatrix; - CsrMatrix csrMatrix; - - const String& inputFileName = parameters.getParameter< String >( "input-file" ); - const String& inputMtxFileName = parameters.getParameter< String >( "input-mtx-file" ); - const String& logFileName = parameters.getParameter< String >( "log-file" ); - const String& pdfFileName = parameters.getParameter< String >( "pdf-file" ); - bool verbose = parameters.getParameter< bool >( "verbose" ); - const int maxIterations = parameters.getParameter< int >( "max-iterations" ); - - std::fstream inputFile; - inputFile.open( inputMtxFileName.getString(), std::ios::in ); - if( ! inputFile ) - { - std::cerr << "I am not able to open the file " << inputMtxFileName << "." << std::endl; - return false; - } - if( ! MatrixReader< CsrMatrix >::readMtxFile( inputFile, csrMatrix ) ) - return false; - - /**** - * Check the number of the non-zero elements - */ - const long int nonzeroElements = csrMatrix. getNumberOfNonzeroMatrixElements(); - if( verbose ) - std::cout << "Matrix rows: " << csrMatrix.getRows() - << " Matrix columns: " << csrMatrix.getColumns() - << " Non-zero elements: " << nonzeroElements << std::endl; - - const long int rows = csrMatrix.getRows(); - const long int columns = csrMatrix.getColumns(); - Vector< RealType, Devices::Host > refX( "ref-x", columns ), refB( "ref-b", rows ); - Vector< RealType, Devices::Cuda > cudaX( "cudaX", columns ); - refX. setValue( 0.0 ); - for( int i = 0; i < columns; i ++ ) - refX[ i ] = 1.0; //( Real ) i * 1.0 / ( Real ) size; - cudaX = refX; - csrMatrix. vectorProduct( refX, refB ); - - /**** - * CSR format benchmark - */ - tnlSpmvBenchmark< CSR< RealType, Devices::Host, int > > csrMatrixBenchmark; - - /**** - * Use the first instance of tnlSpmvBenchmark which we have - * to write the progress-table header. - */ - if( verbose ) - csrMatrixBenchmark. writeProgressTableHeader(); - - csrMatrixBenchmark. setup( csrMatrix ); - csrMatrixBenchmark. setMaxIterations( maxIterations ); - csrMatrixBenchmark. runBenchmark( refX, refB, verbose ); - csrMatrixBenchmark. tearDown(); - - /**** - * Open and write one line to the log file - */ - std::fstream logFile; - if( logFileName ) - { - logFile. open( logFileName. getString(), std::ios::out | std::ios::app ); - if( ! logFile ) - { - std::cerr << "Unable to open log file " << logFileName << " for appending logs." << std::endl; - return false; - } - /**** - * Open new line of the table and write basic matrix information - */ - long int allElements = csrMatrix. getRows() * csrMatrix. getColumns(); - logFile << " <tr>" << std::endl; - logFile << " <td> <a href=\"" << pdfFileName << "\">" << inputFile << "</a> </td>" << std::endl; - logFile << " <td> " << csrMatrix. getRows() << "</td>" << std::endl; - logFile << " <td> " << nonzeroElements << "</td>" << std::endl; - logFile << " <td> " << ( double ) nonzeroElements / allElements * 100.0 << "</td>" << std::endl; - csrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFileName, - csrMatrix, - false ); - } - -#ifdef UNDEF - /**** - * Cusparse CSR format benchmark - */ - tnlSpmvBenchmarkCusparseCSR< Real, int > cusparseCSRBenchmark; - cusparseCSRBenchmark. setup( csrMatrix ); - cusparseCSRBenchmark. setMaxIterations( maxIterations ); - cusparseCSRBenchmark. runBenchmark( cudaX, refB, verbose ); - cusparseCSRBenchmark. tearDown(); - - if( logFileName ) - cusparseCSRBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - true ); - - /**** - * Hybrid format benchmark - */ - tnlSpmvBenchmarkHybridMatrix< Real, int > hybridMatrixBenchmark; - hybridMatrixBenchmark. setFileName( inputMtxFile ); - hybridMatrixBenchmark. setup( csrMatrix ); - hybridMatrixBenchmark. setMaxIterations( maxIterations ); - hybridMatrixBenchmark. setNonzeroElements( nonzeroElements ); - hybridMatrixBenchmark. runBenchmark( refX, refB, verbose ); - hybridMatrixBenchmark. tearDown(); - - if( logFileName ) - { - hybridMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - false ); - } - - /**** - * Row-Grouped CSR format - */ - bestCudaRgCSRGflops = 0.0; - benchmarkRgCSRFormat( csrMatrix, - refX, - cudaX, - refB, - formatTest, - maxIterations, - false, - tnlAdaptiveGroupSizeStrategyByAverageRowSize, - csrMatrixBenchmark, - verbose, - inputMtxFile, - logFileName, - logFile ); - - tnlSpmvBenchmarkRgCSR< Real, Devices::Host, int > hostRgCsrMatrixBenchmark; - hostRgCsrMatrixBenchmark. setGroupSize( 16 ); - hostRgCsrMatrixBenchmark. setUseAdaptiveGroupSize( true ); - hostRgCsrMatrixBenchmark. setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByAverageRowSize ); - hostRgCsrMatrixBenchmark. setup( csrMatrix ); - if( formatTest ) - hostRgCsrMatrixBenchmark. testMatrix( csrMatrix, verbose ); - hostRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - //hostRgCsrMatrixBenchmark. runBenchmark( refX, refB, verbose ); - hostRgCsrMatrixBenchmark. tearDown(); - if( logFileName ) - hostRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - true ); - tnlSpmvBenchmarkRgCSR< Real, Devices::Cuda, int > cudaRgCsrMatrixBenchmark; - for( int cudaBlockSize = 32; cudaBlockSize <= 256; cudaBlockSize *= 2 ) - { - cudaRgCsrMatrixBenchmark. setCudaBlockSize( cudaBlockSize ); - cudaRgCsrMatrixBenchmark. setGroupSize( 16 ); - cudaRgCsrMatrixBenchmark. setUseAdaptiveGroupSize( true ); - cudaRgCsrMatrixBenchmark. setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByAverageRowSize ); - cudaRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - cudaRgCsrMatrixBenchmark. setup( csrMatrix ); - if( formatTest ) - cudaRgCsrMatrixBenchmark. testMatrix( csrMatrix, verbose ); - cudaRgCsrMatrixBenchmark. runBenchmark( cudaX, refB, verbose ); - if( logFileName ) - cudaRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - false ); - } - cudaRgCsrMatrixBenchmark. tearDown(); - - /**** - * Row-Grouped CSR format with reordered rows - * The rows are now sorted decreasingly by the number of the nonzero elements - */ - if( verbose ) - std::cout << " ------------------------------- Test with sorted matrix ---------------------------------- " << std::endl; - - Vector< int, Devices::Host > rowPermutation( "rowPermutation" ); - { - CSR< Real, Devices::Host > orderedCsrMatrix( "orderedCsrMatrix" ); - csrMatrix. sortRowsDecreasingly( rowPermutation ); - - /**** - * Check if the ordering is OK. - */ - int rowSize = csrMatrix. getNonzeroElementsInRow( rowPermutation[ 0 ] ); - for( int i = 1; i < csrMatrix. getSize(); i ++ ) - { - if( rowSize < csrMatrix. getNonzeroElementsInRow( rowPermutation[ i ] ) ) - { - std::cerr << "The rows are not sorted properly. Error is at row number " << i << std::endl; - } - rowSize = csrMatrix. getNonzeroElementsInRow( rowPermutation[ i ] ); - } - orderedCsrMatrix. reorderRows( rowPermutation, csrMatrix ); - orderedCsrMatrix. vectorProduct( refX, refB ); - benchmarkRgCSRFormat( orderedCsrMatrix, - refX, - cudaX, - refB, - formatTest, - maxIterations, - false, - tnlAdaptiveGroupSizeStrategyByAverageRowSize, - csrMatrixBenchmark, - verbose, - inputMtxSortedFile, - logFileName, - logFile ); - - tnlSpmvBenchmarkRgCSR< Real, Devices::Host, int > hostRgCsrMatrixBenchmark; - hostRgCsrMatrixBenchmark. setGroupSize( 16 ); - hostRgCsrMatrixBenchmark. setUseAdaptiveGroupSize( true ); // TODO: fix with true - not implemented yet - hostRgCsrMatrixBenchmark. setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByFirstGroup ); - hostRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - hostRgCsrMatrixBenchmark. setup( orderedCsrMatrix ); - if( formatTest ) - hostRgCsrMatrixBenchmark. testMatrix( orderedCsrMatrix, verbose ); - //hostRgCsrMatrixBenchmark. runBenchmark( refX, refB, verbose ); - hostRgCsrMatrixBenchmark. tearDown(); - if( logFileName ) - hostRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxSortedFile, - csrMatrix, - true ); - for( int cudaBlockSize = 32; cudaBlockSize <= 256; cudaBlockSize *= 2 ) - { - tnlSpmvBenchmarkRgCSR< Real, Devices::Cuda, int > cudaRgCsrMatrixBenchmark; - cudaRgCsrMatrixBenchmark. setCudaBlockSize( cudaBlockSize ); - cudaRgCsrMatrixBenchmark. setGroupSize( 16 ); - cudaRgCsrMatrixBenchmark. setUseAdaptiveGroupSize( true ); - cudaRgCsrMatrixBenchmark. setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByFirstGroup ); - cudaRgCsrMatrixBenchmark. setup( orderedCsrMatrix ); - cudaRgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - - if( formatTest ) - cudaRgCsrMatrixBenchmark. testMatrix( orderedCsrMatrix, verbose ); - cudaRgCsrMatrixBenchmark. runBenchmark( cudaX, refB, verbose ); - if( logFileName ) - cudaRgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxSortedFile, - csrMatrix, - false ); - } - cudaRgCsrMatrixBenchmark. tearDown(); - } - csrMatrix. vectorProduct( refX, refB ); - - /**** - * Adaptive Row-Grouped CSR format - */ - - for( int desiredChunkSize = 1; desiredChunkSize <= 32; desiredChunkSize *= 2 ) - { - tnlSpmvBenchmarkAdaptiveRgCSR< Real, Devices::Cuda, int > cudaArgCsrMatrixBenchmark; - cudaArgCsrMatrixBenchmark. setDesiredChunkSize( desiredChunkSize ); - for( int cudaBlockSize = 32; cudaBlockSize <= 256; cudaBlockSize *= 2 ) - { - cudaArgCsrMatrixBenchmark. setCudaBlockSize( cudaBlockSize ); - cudaArgCsrMatrixBenchmark. setup( csrMatrix ); - if( formatTest ) - cudaArgCsrMatrixBenchmark. testMatrix( csrMatrix, verbose ); - cudaArgCsrMatrixBenchmark. setMaxIterations( maxIterations ); - cudaArgCsrMatrixBenchmark. runBenchmark( cudaX, refB, verbose ); - cudaArgCsrMatrixBenchmark. setBestRgCSRGflops( bestCudaRgCSRGflops ); - if( logFileName ) - cudaArgCsrMatrixBenchmark. writeToLogTable( logFile, - csrMatrixBenchmark. getGflops(), - inputMtxFile, - csrMatrix, - true ); - } - cudaRgCsrMatrixBenchmark. tearDown(); - } - -#endif - - - if( logFileName ) - { - logFile << " </tr>" << std::endl; - logFile. close(); - } - return true; - -} - -int main( int argc, char* argv[] ) -{ - Config::ParameterContainer parameters; - Config::ConfigDescription conf_desc; - - if( conf_desc.parseConfigDescription( configFile ) != 0 ) - return 1; - if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) - { - conf_desc.printUsage( argv[ 0 ] ); - return 1; - } - const String& precision = parameters.getParameter< String >( "precision" ); - if( precision == "float" ) - if( ! benchmarkMatrix< float >( parameters ) ) - return EXIT_FAILURE; - if( precision == "double" ) - if( ! benchmarkMatrix< double >( parameters ) ) - return EXIT_FAILURE; - return EXIT_SUCCESS; -} - -#endif /* SPARSEMATRIXBENCHMARK_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmark.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmark.h deleted file mode 100644 index 70f3d1fc3a234858f39c3106040f5bace3aca6f0..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmark.h +++ /dev/null @@ -1,47 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmark.h - description - ------------------- - begin : Dec 29, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARK_H_ -#define TNLSPMVBENCHMARK_H_ - -#include "tnlSpmvBenchmarkBase.h" -#include <TNL/Matrices/CSR.h> - - -template< typename Matrix > -class tnlSpmvBenchmark -{ -}; - -template< typename Real, typename Device, typename Index > -class tnlSpmvBenchmark< CSR< Real, Device, Index > > : public tnlSpmvBenchmarkBase< CSR< Real, Device, Index > > -{ - public: - - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - - bool setup( const CSR< RealType, Devices::Host, IndexType >& matrix ); - - void tearDown(); - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< RealType, Devices::Host, IndexType >& csrMatrix, - bool writeMatrixInfo ) const; -}; - -#include "tnlSpmvBenchmark_impl.h" - -#endif /* TNLSPMVBENCHMARK_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h deleted file mode 100644 index 35a5b388c9b317d3fbf4292337814b76d6d7122b..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkAdaptiveRgCSRMatrix.h +++ /dev/null @@ -1,261 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmarkAdaptiveRgCSR.h - description - ------------------- - begin : May 15, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKADAPTIVERGCSRMATRIX_H_ -#define TNLSPMVBENCHMARKADAPTIVERGCSRMATRIX_H_ - -#include "tnlSpmvBenchmark.h" - -#include <TNL/Assert.h> -#include <TNL/Exceptions/CudaSupportMissing.h> - -template< typename Real, typename Device, typename Index> -class tnlSpmvBenchmarkAdaptiveRgCSR : public tnlSpmvBenchmark< Real, Device, Index, tnlAdaptiveRgCSR > -{ - public: - - tnlSpmvBenchmarkAdaptiveRgCSR(); - - bool setup( const CSR< Real, Devices::Host, Index >& matrix ); - - void tearDown(); - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const; - - void setDesiredChunkSize( const Index desiredChunkSize ); - - void setCudaBlockSize( const Index cudaBlockSize ); - - Index getArtificialZeroElements() const; - - void setBestRgCSRGflops( const double& bestRgCSRGflops ); - - protected: - - /**** - * This is helper method for generating HTML table with benchmark results - */ - String getBgColorByRgCSRSpeedUp( const double& speedUp ) const; - - Index desiredChunkSize; - - Index cudaBlockSize; - - bool useAdaptiveGroupSize; - - tnlAdaptiveGroupSizeStrategy adaptiveGroupSizeStrategy; - - double bestRgCSRGflops; -}; - -template< typename Real, - typename Device, - typename Index> -tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: tnlSpmvBenchmarkAdaptiveRgCSR() - : desiredChunkSize( 4 ), - cudaBlockSize( 32 ), - useAdaptiveGroupSize( false ), - adaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByAverageRowSize ), - bestRgCSRGflops( 0.0 ) - -{ -} - -template< typename Real, - typename Device, - typename Index> -bool tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: setup( const CSR< Real, Devices::Host, Index >& matrix ) -{ - //TNL_ASSERT( this->groupSize > 0, std::cerr << "groupSize = " << this->groupSize ); - if( Device :: getDevice() == Devices::HostDevice ) - { - this->matrix. tuneFormat( desiredChunkSize, cudaBlockSize ); - if( ! this->matrix. copyFrom( matrix ) ) - return false; - //matrix. printOut(std::cout, "text", 30 ); - //this->matrix. printOut(std::cout, "text", 30 ); - } - if( Device :: getDevice() == Devices::CudaDevice ) - { -#ifdef HAVE_CUDA - tnlAdaptiveRgCSR< Real, Devices::Host, Index > hostMatrix( "tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: setup : hostMatrix" ); - hostMatrix. tuneFormat( desiredChunkSize, cudaBlockSize ); - hostMatrix. copyFrom( matrix ); - if( ! this->matrix. copyFrom( hostMatrix ) ) - return false; -#else - return false; -#endif - } - this->setupOk = true; - return true; -} - -template< typename Real, - typename Device, - typename Index> -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: tearDown() -{ - //this->matrix. setSize( 0 ); - //this->matrix. setNonzeroElements( 0 ); -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: writeProgress() const -{ - std::cout << left << std::setw( this->formatColumnWidth - 15 ) << "Adap. Row-grouped CSR "; - if( Device :: getDevice() == Devices::CudaDevice ) - std::cout << std::setw( 5 ) << this->desiredChunkSize - << std::setw( 10 ) << this->cudaBlockSize; - else - std::cout << std::setw( 15 ) << this->desiredChunkSize; - std::cout << right << std::setw( this->timeColumnWidth ) << std::setprecision( 2 ) << this->getTime() - << right << std::setw( this->iterationsColumnWidth ) << this->getIterations() - << right << std::setw( this->gflopsColumnWidth ) << std::setprecision( 2 ) << this->getGflops(); - if( this->getBenchmarkWasSuccesful() ) - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " OK - maxError is " << this->maxError << ". "; - else - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " FAILED"; -#ifndef HAVE_CUDA - if( Device :: getDevice() == Devices::CudaDevice ) - throw Exceptions::CudaSupportMissing(); -#endif - std::cout << std::endl; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const -{ - if( this->getBenchmarkWasSuccesful() ) - { - String bgColor="#FFFFFF"; - double speedUp = this->getGflops() / csrGflops; - double rgCsrSpeedUp( 0.0 ); - if( this->bestRgCSRGflops ) - rgCsrSpeedUp = this->getGflops() / this->bestRgCSRGflops; - switch( desiredChunkSize ) - { - case 1: bgColor = "#666666"; break; - case 2: bgColor = "#777777"; break; - case 4: bgColor = "#888888"; break; - case 8: bgColor = "#999999"; break; - case 16: bgColor = "#AAAAAA"; break; - case 32: bgColor = "#BBBBBB"; break; - default: bgColor = "#FFFFFF"; - } - if( writeMatrixInfo ) - { - String baseFileName( inputMtxFile ); - baseFileName += String( ".argcsr-"); - baseFileName += String( desiredChunkSize ); - baseFileName += String( "-" ); - baseFileName += String( cudaBlockSize ); - String matrixPdfFile = baseFileName + String( ".pdf" ); - String matrixHtmlFile = baseFileName + String( ".html" ); - tnlAdaptiveRgCSR< Real > argCsrMatrix( inputMtxFile ); - argCsrMatrix. tuneFormat( this->desiredChunkSize, - this->cudaBlockSize ); - argCsrMatrix. copyFrom( csrMatrix ); - this->printMatrixInHtml( matrixHtmlFile, argCsrMatrix ); - if( rgCsrSpeedUp > 1.0 ) - bgColor=getBgColorByRgCSRSpeedUp( rgCsrSpeedUp ); - logFile << " <td bgcolor=" << bgColor << "> <a href=\"" << matrixPdfFile << "\">PDF</a>, <a href=\"" << matrixHtmlFile << "\">HTML</a></td> " << std::endl; - logFile << " <td bgcolor=" << bgColor << "> " << this->getArtificialZeroElements() << "</td>" << std::endl; - } - - bgColor = this->getBgColorBySpeedUp( speedUp ); - String textColor = "#000000"; //getBgColorByRgCSRSpeedUp( rgCsrSpeedUp ); - logFile << " <td bgcolor=" << bgColor << "><font size=3 color=\"" << textColor << "\"> " << this->getTime() << "</font></td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << "><font size=3 color=\"" << textColor << "\"> " << this->getGflops() << "</font></td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << "><font size=3 color=\"" << textColor << "\"> " << speedUp << "</font></td>" << std::endl; - - } - else - { - if( writeMatrixInfo ) - { - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: setDesiredChunkSize( const Index desiredChunkSize ) -{ - this->desiredChunkSize = desiredChunkSize; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: setCudaBlockSize( const Index cudaBlockSize ) -{ - this->cudaBlockSize = cudaBlockSize; -} - -template< typename Real, - typename Device, - typename Index > -Index tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: getArtificialZeroElements() const -{ - return this->matrix. getArtificialZeroElements(); -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: setBestRgCSRGflops( const double& bestRgCSRGflops ) -{ - this->bestRgCSRGflops = bestRgCSRGflops; -} - -template< typename Real, - typename Device, - typename Index > -String tnlSpmvBenchmarkAdaptiveRgCSR< Real, Device, Index > :: getBgColorByRgCSRSpeedUp( const double& speedUp ) const -{ - if( speedUp >= 30.0 ) - return String( "#009900" ); - if( speedUp >= 25.0 ) - return String( "#00AA00" ); - if( speedUp >= 20.0 ) - return String( "#00BB00" ); - if( speedUp >= 15.0 ) - return String( "#00CC00" ); - if( speedUp >= 10.0 ) - return String( "#00DD00" ); - if( speedUp >= 5.0 ) - return String( "#00EE00" ); - if( speedUp >= 1.0 ) - return String( "#00FF00" ); - return String( "#FFFFFF" ); -} - -#endif /* TNLSPMVBENCHMARKADAPTIVERGCSRMATRIX_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase.h deleted file mode 100644 index 4e5e58078b8929f089e78022c54de6b880a49a8b..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase.h +++ /dev/null @@ -1,129 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmarkBase.h - description - ------------------- - begin : May 15, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKBASE_H_ -#define TNLSPMVBENCHMARKBASE_H_ - -#include <TNL/Matrices/CSR.h> -#include <TNL/TimerRT.h> -#include <TNL/Math.h> - - -double tnlSpmvBenchmarkPrecision( const double& ) { return 1.0e-12; } -float tnlSpmvBenchmarkPrecision( const float& ) { return 1.0e-4; } - -template< typename Matrix > -class tnlSpmvBenchmarkBase -{ - public: - - tnlSpmvBenchmarkBase(); - - typedef typename Matrix::RealType RealType; - typedef typename Matrix::DeviceType DeviceType; - typedef typename Matrix::IndexType IndexType; - - bool getBenchmarkWasSuccesful() const; - - double getGflops() const; - - double getTime() const; - - void setMaxIterations( const int maxIterations ); - - int getIterations() const; - - IndexType getArtificialZeros() const; - - RealType getMaxError() const; - - void writeProgressTableHeader(); - - virtual bool setup( const CSR< RealType, Devices::Host, IndexType >& matrix ) = 0; - - virtual void tearDown() = 0; - - virtual void writeProgress() const = 0; - - /**** - * This is virtual only the purpose of testing external formats like - * the Hybrid format from the CUSP library. This format is not wrapped - * in Matrix. - */ - virtual void runBenchmark( const Vector< RealType, DeviceType, IndexType >& x, - const Vector< RealType, Devices::Host, IndexType >& refB, - bool verbose ); - - virtual void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< RealType, Devices::Host, IndexType >& csrMatrix, - bool writeMatrixInfo ) const = 0; - - protected: - - /**** - * This is helper method for generating HTML table with benchmark results - */ - String getBgColorBySpeedUp( const double& speedUp ) const; - - /**** - * Helper method for writing matrix statistics and information to HTML - */ - bool printMatrixInHtml( const String& fileName, - Matrix< RealType, Devices::Host, IndexType >& matrix ) const; - - - bool benchmarkWasSuccesful; - - bool setupOk; - - double gflops; - - double time; - - /**** - * Max number of SpMV repetitions. - */ - int maxIterations; - - /**** - * Real number of repetitions. - */ - int iterations; - - IndexType artificialZeros; - - RealType maxError; - - IndexType firstErrorOccurence; - - Matrix matrix; - - /**** - * Parameters for the progress table columns - */ - - int formatColumnWidth; - - int timeColumnWidth; - - int iterationsColumnWidth; - - int gflopsColumnWidth; - - int benchmarkStatusColumnWidth; - - int infoColumnWidth; -}; - - -#include "tnlSpmvBenchmarkBase_impl.h" -#endif /* TNLSPMVBENCHMARKBASE_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase_impl.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase_impl.h deleted file mode 100644 index 117fdd89b11cecb3ed5376c8ee53301b8d36288d..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkBase_impl.h +++ /dev/null @@ -1,210 +0,0 @@ -/*************************************************************************** - tnlSpmBenchmarkBase_impl.h - description - ------------------- - begin : Dec 29, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKBASE_IMPL_H_ -#define TNLSPMVBENCHMARKBASE_IMPL_H_ - -template< typename Matrix > -tnlSpmvBenchmarkBase< Matrix >::tnlSpmvBenchmarkBase() - : benchmarkWasSuccesful( false ), - setupOk( false ), - gflops( 0.0 ), - time( 0.0 ), - maxIterations( 0 ), - iterations( 0.0 ), - artificialZeros( 0 ), - maxError( 0.0 ), - firstErrorOccurence( 0 ), - formatColumnWidth( 40 ), - timeColumnWidth( 12 ), - iterationsColumnWidth( 15 ), - gflopsColumnWidth( 12 ), - benchmarkStatusColumnWidth( 12 ), - infoColumnWidth( 20 ) -{ -} - -template< typename Matrix > -bool tnlSpmvBenchmarkBase< Matrix >::getBenchmarkWasSuccesful() const -{ - return this->benchmarkWasSuccesful; -} - -template< typename Matrix > -double tnlSpmvBenchmarkBase< Matrix >::getGflops() const -{ - return this->gflops; -} - -template< typename Matrix > -double tnlSpmvBenchmarkBase< Matrix >::getTime() const -{ - return this->time; -} - -template< typename Matrix > -void tnlSpmvBenchmarkBase< Matrix >::setMaxIterations( const int maxIterations ) -{ - this->maxIterations = maxIterations; -} - -template< typename Matrix > -int tnlSpmvBenchmarkBase< Matrix >::getIterations() const -{ - return this->iterations; -} - - -template< typename Matrix > -typename Matrix::IndexType tnlSpmvBenchmarkBase< Matrix >::getArtificialZeros() const -{ - return this->artificialZeros; -} - -template< typename Matrix > -typename Matrix::RealType tnlSpmvBenchmarkBase< Matrix >::getMaxError() const -{ - return this->maxError; -} - -template< typename Matrix > -void tnlSpmvBenchmarkBase< Matrix >::runBenchmark( const Vector< RealType, DeviceType, IndexType >& x, - const Vector< RealType, Devices::Host, IndexType >& refB, - bool verbose ) -{ - benchmarkWasSuccesful = false; - if( ! setupOk ) - return; -#ifndef HAVE_CUDA - if( DeviceType::getDevice() == Devices::CudaDevice ) - { - if( verbose ) - writeProgress(); - return; - } -#endif - - Vector< RealType, DeviceType, IndexType > b( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); - if( ! b. setSize( refB. getSize() ) ) - return; - - iterations = 0; - - TimerRT rt_timer; - rt_timer. Reset(); - //maxIterations = 1; - - for( int i = 0; i < maxIterations; i ++ ) - { - matrix. vectorProduct( x, b ); - iterations ++; - } - - this->time = rt_timer. getTime(); - - firstErrorOccurence = 0; - Vector< RealType, Devices::Host, IndexType > resB( "tnlSpmvBenchmark< Real, Device, Index, Matrix > :: runBenchmark : b" ); - if( ! resB. setSize( b. getSize() ) ) - { - std::cerr << "I am not able to allocate copy of vector b on the host." << std::endl; - return; - } - resB = b; - benchmarkWasSuccesful = true; - for( IndexType j = 0; j < refB. getSize(); j ++ ) - { - //f << refB[ j ] << " - " << host_b[ j ] << " = " << refB[ j ] - host_b[ j ] << std::endl; - RealType error( 0.0 ); - if( refB[ j ] != 0.0 ) - error = ( RealType ) fabs( refB[ j ] - resB[ j ] ) / ( RealType ) fabs( refB[ j ] ); - else - error = ( RealType ) fabs( refB[ j ] ); - if( error > maxError ) - firstErrorOccurence = j; - this->maxError = max( this->maxError, error ); - - /*if( error > tnlSpmvBenchmarkPrecision( error ) ) - benchmarkWasSuccesful = false;*/ - - } - //cout << "First error was on " << firstErrorOccurence << std::endl; - - double flops = 2.0 * iterations * matrix.getNumberOfNonzeroMatrixElements(); - this->gflops = flops / time * 1.0e-9; - artificialZeros = matrix.getNumberOfMatrixElements() - matrix.getNumberOfNonzeroMatrixElements(); - - if( verbose ) - writeProgress(); -} - -template< typename Matrix > -void tnlSpmvBenchmarkBase< Matrix >::writeProgressTableHeader() -{ - int totalWidth = this->formatColumnWidth + - this->timeColumnWidth + - this->iterationsColumnWidth + - this->gflopsColumnWidth + - this->benchmarkStatusColumnWidth + - this->infoColumnWidth; - - std::cout << left << std::setw( this->formatColumnWidth - 5 ) << "MATRIX FORMAT" - << left << std::setw( 5 ) << "BLOCK" - << right << std::setw( this->timeColumnWidth ) << "TIME" - << right << std::setw( this->iterationsColumnWidth ) << "ITERATIONS" - << right << std::setw( this->gflopsColumnWidth ) << "GFLOPS" - << right << std::setw( this->benchmarkStatusColumnWidth ) << "CHECK" - << left << std::setw( this->infoColumnWidth ) << " INFO" << std::endl - << setfill( '-' ) << std::setw( totalWidth ) << "--" << std::endl - << setfill( ' '); -} - -template< typename Matrix > -String tnlSpmvBenchmarkBase< Matrix > :: getBgColorBySpeedUp( const double& speedUp ) const -{ - if( speedUp >= 30.0 ) - return String( "#FF9900" ); - if( speedUp >= 25.0 ) - return String( "#FFAA00" ); - if( speedUp >= 20.0 ) - return String( "#FFBB00" ); - if( speedUp >= 15.0 ) - return String( "#FFCC00" ); - if( speedUp >= 10.0 ) - return String( "#FFDD00" ); - if( speedUp >= 5.0 ) - return String( "#FFEE00" ); - if( speedUp >= 1.0 ) - return String( "#FFFF00" ); - return String( "#FFFFFF" ); -} - - -template< typename Matrix > -bool tnlSpmvBenchmarkBase< Matrix > :: printMatrixInHtml( const String& fileName, - Matrix< RealType, Devices::Host, IndexType >& matrix ) const -{ - //cout << "Writing to file " << fileName << std::endl; - std::fstream file; - file. open( fileName. getString(), std::ios::out ); - if( ! file ) - { - std::cerr << "I am not able to open the file " << fileName << std::endl; - return false; - } - file << "<html>" << std::endl; - file << " <body>" << std::endl; - matrix. printOut( file, "html" ); - file << " </body>" << std::endl; - file << "</html>" << std::endl; - file. close(); - return true; -} - -#endif /* TNLSPMVBENCHMARKBASE_IMPL_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCSRMatrix.h deleted file mode 100644 index 829284d6ebd52ffcbffca9546b830d2e081c45d5..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCSRMatrix.h +++ /dev/null @@ -1,119 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmarkCSR.h - description - ------------------- - begin : May 15, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKCSRMATRIX_H_ -#define TNLSPMVBENCHMARKCSRMATRIX_H_ - -#include "tnlSpmvBenchmark.h" -#include <TNL/Matrices/CSR.h> - -template< typename Real, typename Index> -class tnlSpmvBenchmarkCSR : public tnlSpmvBenchmark< Real, Devices::Host, Index, CSR > -{ - public: - - bool setup( const CSR< Real, Devices::Host, Index >& matrix ); - - void tearDown(); - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const; - Real getForwardBackwardDifference() const; - - protected: - - /*!*** - * This measures the difference between SpMV result when used forward or bakward - * matrix columns ordering. - */ - Real forwardBackwardDifference; -}; - -template< typename Real, typename Index> -bool tnlSpmvBenchmarkCSR< Real, Index > :: setup( const CSR< Real, Devices::Host, Index >& matrix ) -{ - this->matrix = matrix; - - const Index size = matrix. getSize(); - Vector< Real, Devices::Host > refX( "ref-x", size ), refB( "ref-b", size), backwardRefB( "backwardRef-b", size); - refX. setValue( 1.0 ); - this->matrix. vectorProduct( refX, refB ); - this->matrix. setBackwardSpMV( true ); - this->matrix. vectorProduct( refX, backwardRefB ); - this->matrix. setBackwardSpMV( false ); - Real error( 0.0 ), maxError( 0.0 ); - for( Index j = 0; j < refB. getSize(); j ++ ) - { - if( refB[ j ] != 0.0 && backwardRefB[ j ] != 0.0 ) - error = ( Real ) fabs( refB[ j ] - backwardRefB[ j ] ) / min( ( Real ) fabs( refB[ j ] ), ( Real ) fabs( backwardRefB[ j ] ) ); - else - error = max( ( Real ) fabs( refB[ j ] ), ( Real ) fabs( backwardRefB[ j ] ) ); - maxError = max( error, maxError ); - } - forwardBackwardDifference = maxError; - this->setupOk = true; - return true; -} - -template< typename Real, typename Index> -void tnlSpmvBenchmarkCSR< Real, Index > :: tearDown() -{ - this->matrix. setSize( 0 ); -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkCSR< Real, Index > :: writeProgress() const -{ - std::cout << left << std::setw( this->formatColumnWidth ) << "CSR"; - // std::cout << left << std::setw( 25 ) << matrixFormat << std::setw( 5 ) << cudaBlockSize; - std::cout << right << std::setw( this->timeColumnWidth ) << std::setprecision( 2 ) << this->getTime() - << right << std::setw( this->iterationsColumnWidth ) << this->getIterations() - << right << std::setw( this->gflopsColumnWidth ) << std::setprecision( 2 ) << this->getGflops(); - if( this->getBenchmarkWasSuccesful() ) - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " OK - SpMV diff. " << getForwardBackwardDifference(); - else - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " FAILED "; - std::cout << std::endl; -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkCSR< Real, Index > :: writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const -{ - if( this->getBenchmarkWasSuccesful() ) - { - logFile << " <td> " << this->getTime() << "</font></td>" << std::endl; - logFile << " <td> " << this->getGflops() << "</td>" << std::endl; - } - else - { - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } -} - -template< typename Real, - typename Index > -Real tnlSpmvBenchmarkCSR< Real, Index > :: getForwardBackwardDifference() const -{ - return forwardBackwardDifference; -} - -#endif /* TNLSPMVBENCHMARKCSRMATRIX_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCusparseCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCusparseCSRMatrix.h deleted file mode 100644 index 582cc5b9def78301525a8b34929cfc33f2fa210e..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkCusparseCSRMatrix.h +++ /dev/null @@ -1,115 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmarkCusparseCSR.h - description - ------------------- - begin : Feb 16, 2012 - copyright : (C) 2012 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKCUSPARSECSRMATRIX_H_ -#define TNLSPMVBENCHMARKCUSPARSECSRMATRIX_H_ - -#include "tnlSpmvBenchmark.h" -#include <TNL/tnlConfig.h> -#include <TNL/legacy/matrices/tnlCusparseCSR.h> - -template< typename Real, typename Index> -class tnlSpmvBenchmarkCusparseCSR : public tnlSpmvBenchmark< Real, Devices::Cuda, Index, tnlCusparseCSR > -{ - public: - tnlSpmvBenchmarkCusparseCSR(); - - bool setup( const CSR< Real, Devices::Host, Index >& matrix ); - - void tearDown(); - - Index getArtificialZeros() const; - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const; - - void setNonzeroElements( const Index nonzeroElements ); -}; - -template< typename Real, typename Index> -bool tnlSpmvBenchmarkCusparseCSR< Real, Index > :: setup( const CSR< Real, Devices::Host, Index >& matrix ) -{ - if( ! this->matrix. copyFrom( matrix ) ) - return false; - this->setupOk = true; - return true; -} - -template< typename Real, - typename Index> -void tnlSpmvBenchmarkCusparseCSR< Real, Index > :: tearDown() -{ - this->matrix. reset(); -} - -template< typename Real, - typename Index> -Index tnlSpmvBenchmarkCusparseCSR< Real, Index > :: getArtificialZeros() const -{ - return 0; -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkCusparseCSR< Real, Index > :: writeProgress() const -{ - std::cout << left << std::setw( this->formatColumnWidth ) << "Cusparse"; - // std::cout << left << std::setw( 25 ) << matrixFormat << std::setw( 5 ) << cudaBlockSize; - std::cout << right << std::setw( this->timeColumnWidth ) << std::setprecision( 2 ) << this->getTime() - << right << std::setw( this->iterationsColumnWidth ) << this->getIterations() - << right << std::setw( this->gflopsColumnWidth ) << std::setprecision( 2 ) << this->getGflops(); - if( this->getBenchmarkWasSuccesful() ) - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << "OK "; - else - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " FAILED - maxError is " << this->maxError << ". "; -#ifndef HAVE_CUSP - std::cout << "CUSPARSE library is missing."; -#endif - std::cout << std::endl; -} - -template< typename Real, - typename Index > -tnlSpmvBenchmarkCusparseCSR< Real, Index > :: tnlSpmvBenchmarkCusparseCSR() -{ - -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkCusparseCSR< Real, Index > :: writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const -{ - if( this->getBenchmarkWasSuccesful() ) - { - double speedUp = this->getGflops() / csrGflops; - String bgColor = this->getBgColorBySpeedUp( speedUp ); - logFile << " <td bgcolor=" << bgColor << ">" << this->getTime() << "</td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << ">" << this->getGflops() << "</td>" << std::endl; - - logFile << " <td bgcolor=" << bgColor << "> " << speedUp << "</td>" << std::endl; - } - else - { - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } -} - -#endif /* TNLSPMVBENCHMARKCUSPARSECSRMATRIX_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkHybridMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkHybridMatrix.h deleted file mode 100644 index 71694e0f561aef3b87feda04fb24ce41487d92f3..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkHybridMatrix.h +++ /dev/null @@ -1,200 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmarkHybridMatrix.h - description - ------------------- - begin : May 15, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKHYBRIDMATRIX_H_ -#define TNLSPMVBENCHMARKHYBRIDMATRIX_H_ - -#include "tnlSpmvBenchmark.h" -#include <TNL/tnlConfig.h> -#ifdef HAVE_CUSP - #include <cusp/hyb_matrix.h> - #include <cusp/io/matrix_market.h> - #include <cusp/multiply.h> - #include <cusp/print.h> -#endif - - -template< typename Real, typename Index> -class tnlSpmvBenchmarkHybridMatrix : public tnlSpmvBenchmark< Real, Devices::Host, Index, CSR > -{ - public: - - void setFileName( const String& fileName ); - - bool setup( const CSR< Real, Devices::Host, Index >& matrix ); - - void tearDown(); - - void runBenchmark( const Vector< Real, Devices::Host, Index >& x, - const Vector< Real, Devices::Host, Index >& refB, - bool verbose ); - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const; - - void setNonzeroElements( const Index nonzeroElements ); - - protected: - - String fileName; - - Index nonzeroElements; -}; - -template< typename Real, typename Index> -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: setFileName( const String& fileName ) -{ - this->fileName = fileName; -} - -template< typename Real, typename Index> -bool tnlSpmvBenchmarkHybridMatrix< Real, Index > :: setup( const CSR< Real, Devices::Host, Index >& matrix ) -{ - return true; -} - -template< typename Real, - typename Index> -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: tearDown() -{ - -} - -template< typename Real, - typename Index> -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: runBenchmark( const Vector< Real, Devices::Host, Index >& _x, - const Vector< Real, Devices::Host, Index >& refB, - bool verbose ) -{ - this->benchmarkWasSuccesful = false; -#ifdef HAVE_CUSP - try - { - // create an empty sparse matrix structure (HYB format) - cusp::hyb_matrix< Index, Real, cusp::device_memory > A; - - // load a matrix stored in MatrixMarket format - cusp::io::read_matrix_market_file( A, this->fileName. getString() ); - - // allocate storage for solution (x) and right hand side (b) - cusp::array1d< Real, cusp::host_memory > host_x( A.num_rows, 1 ); - cusp::array1d< Real, cusp::device_memory > x( A.num_rows, 1 ); - cusp::array1d< Real, cusp::device_memory > b( A.num_rows, 0 ); - - for( Index j = 0; j < refB. getSize(); j ++ ) - host_x[ j ] = _x[ j ]; - - x = host_x; - - TimerRT rt_timer; - rt_timer. Reset(); - - this->iterations = 0; - //while( rt_timer. getTime() < time ) - { - for( int i = 0; i < this->maxIterations; i ++ ) - { - cusp :: multiply( A, x, b ); - cudaThreadSynchronize(); - this->iterations ++; - } - } - this->time = rt_timer. getTime(); - - cusp::array1d< Real, cusp::host_memory > host_b( b ); - host_b = b; - - for( Index j = 0; j < refB. getSize(); j ++ ) - { - //f << refB[ j ] << " - " << host_b[ j ] << " = " << refB[ j ] - host_b[ j ] << std::endl; - if( refB[ j ] != 0.0 ) - this->maxError = max( this->maxError, ( Real ) fabs( refB[ j ] - host_b[ j ] ) / ( Real ) fabs( refB[ j ] ) ); - else - this->maxError = max( this->maxError, ( Real ) fabs( refB[ j ] ) ); - } - //if( this->maxError < 1.0 ) - this->benchmarkWasSuccesful = true; - //else - // this->benchmarkWasSuccesful = false; - - - double flops = 2.0 * this->iterations * this->nonzeroElements; - this->gflops = flops / this->time * 1.0e-9; - - } - catch( std::bad_alloc ) - { - writeProgress(); - return; - } -#else - this->benchmarkWasSuccesful = false; -#endif - writeProgress(); -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: writeProgress() const -{ - std::cout << left << std::setw( this->formatColumnWidth ) << "Hybrid"; - // std::cout << left << std::setw( 25 ) << matrixFormat << std::setw( 5 ) << cudaBlockSize; - std::cout << right << std::setw( this->timeColumnWidth ) << std::setprecision( 2 ) << this->getTime() - << right << std::setw( this->iterationsColumnWidth ) << this->getIterations() - << right << std::setw( this->gflopsColumnWidth ) << std::setprecision( 2 ) << this->getGflops(); - if( this->getBenchmarkWasSuccesful() ) - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << "OK "; - else - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " FAILED - maxError is " << this->maxError << ". "; -#ifndef HAVE_CUSP - std::cout << "CUSP library is missing."; -#endif - std::cout << std::endl; -} - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const -{ - if( this->getBenchmarkWasSuccesful() ) - { - double speedUp = this->getGflops() / csrGflops; - String bgColor = this->getBgColorBySpeedUp( speedUp ); - logFile << " <td bgcolor=" << bgColor << ">" << this->getTime() << "</td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << ">" << this->getGflops() << "</td>" << std::endl; - - logFile << " <td bgcolor=" << bgColor << "> " << speedUp << "</td>" << std::endl; - } - else - { - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } -} - - -template< typename Real, - typename Index > -void tnlSpmvBenchmarkHybridMatrix< Real, Index > :: setNonzeroElements( const Index nonzeroElements ) -{ - this->nonzeroElements = nonzeroElements; -} - -#endif /* TNLSPMVBENCHMARKHYBRIDMATRIX_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h deleted file mode 100644 index 6327ac95d659ebc6592d458f27288a11cf34d141..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmarkRgCSRMatrix.h +++ /dev/null @@ -1,237 +0,0 @@ - /*************************************************************************** - tnlSpmvBenchmarkRgCSR.h - description - ------------------- - begin : May 15, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARKRGCSRMATRIX_H_ -#define TNLSPMVBENCHMARKRGCSRMATRIX_H_ - -#include "tnlSpmvBenchmark.h" - -#include <TNL/Exceptions/CudaSupportMissing.h> - -template< typename Real, typename Device, typename Index> -class tnlSpmvBenchmarkRgCSR : public tnlSpmvBenchmark< Real, Device, Index, tnlRgCSR > -{ - public: - - tnlSpmvBenchmarkRgCSR(); - - bool setup( const CSR< Real, Devices::Host, Index >& matrix ); - - void tearDown(); - - void writeProgress() const; - - void writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const; - - void setGroupSize( const Index groupSize ); - - void setCudaBlockSize( const Index cudaBlockSize ); - - void setUseAdaptiveGroupSize( bool useAdaptiveGroupSize ); - - void setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategy adaptiveGroupSizeStrategy ); - - Index getArtificialZeroElements() const; - - protected: - - Index groupSize; - - Index cudaBlockSize; - - bool useAdaptiveGroupSize; - - tnlAdaptiveGroupSizeStrategy adaptiveGroupSizeStrategy; -}; - -template< typename Real, - typename Device, - typename Index> -tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: tnlSpmvBenchmarkRgCSR() - : groupSize( 0 ), - cudaBlockSize( 0 ), - useAdaptiveGroupSize( false ), - adaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategyByAverageRowSize ) -{ -} - -template< typename Real, - typename Device, - typename Index> -bool tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setup( const CSR< Real, Devices::Host, Index >& csrMatrix ) -{ - TNL_ASSERT( this->groupSize > 0, std::cerr << "groupSize = " << this->groupSize ); - if( Device :: getDevice() == Devices::HostDevice ) - { - this->matrix. tuneFormat( groupSize, - this->useAdaptiveGroupSize, - this->adaptiveGroupSizeStrategy ); - if( ! this->matrix. copyFrom( csrMatrix ) ) - return false; - } - if( Device :: getDevice() == Devices::CudaDevice ) - { -#ifdef HAVE_CUDA - tnlRgCSR< Real, Devices::Host, Index > hostMatrix( "tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setup : hostMatrix" ); - hostMatrix. tuneFormat( groupSize, - this->useAdaptiveGroupSize, - this->adaptiveGroupSizeStrategy ); - hostMatrix. copyFrom( csrMatrix ); - if( ! this->matrix. copyFrom( hostMatrix ) ) - return false; -#else - return false; -#endif - } - this->setupOk = true; - return true; -} - -template< typename Real, - typename Device, - typename Index> -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: tearDown() -{ - this->matrix. reset(); -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: writeProgress() const -{ - std::cout << left << std::setw( this->formatColumnWidth - 15 ) << "Row-grouped CSR "; - if( Device :: getDevice() == Devices::CudaDevice ) - { - if( useAdaptiveGroupSize ) - std::cout << std::setw( 5 ) << "Var."; - else - std::cout << std::setw( 5 ) << this->groupSize; - std::cout << std::setw( 10 ) << this->cudaBlockSize; - } - else - { - if( useAdaptiveGroupSize ) - std::cout << std::setw( 15 ) << "Var."; - else - std::cout << std::setw( 15 ) << this->groupSize; - } - std::cout << right << std::setw( this->timeColumnWidth ) << std::setprecision( 2 ) << this->getTime() - << right << std::setw( this->iterationsColumnWidth ) << this->getIterations() - << right << std::setw( this->gflopsColumnWidth ) << std::setprecision( 2 ) << this->getGflops(); - if( this->getBenchmarkWasSuccesful() ) - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " OK - maxError is " << this->maxError << ". "; - else - std::cout << right << std::setw( this->benchmarkStatusColumnWidth ) << " FAILED - maxError is " << this->maxError << ". "; -#ifndef HAVE_CUDA - if( Device :: getDevice() == Devices::CudaDevice ) - throw Exceptions::CudaSupportMissing(); -#endif - std::cout << std::endl; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setGroupSize( const Index groupSize ) -{ - this->groupSize = groupSize; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setCudaBlockSize( const Index cudaBlockSize ) -{ - this->matrix. setCUDABlockSize( cudaBlockSize ); - this->cudaBlockSize = cudaBlockSize; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setUseAdaptiveGroupSize( bool useAdaptiveGroupSize ) -{ - this->useAdaptiveGroupSize = useAdaptiveGroupSize; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: setAdaptiveGroupSizeStrategy( tnlAdaptiveGroupSizeStrategy adaptiveGroupSizeStrategy ) -{ - this->adaptiveGroupSizeStrategy = adaptiveGroupSizeStrategy; -} - -template< typename Real, - typename Device, - typename Index > -Index tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: getArtificialZeroElements() const -{ - return this->matrix. getArtificialZeroElements(); -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmarkRgCSR< Real, Device, Index > :: writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< Real, Devices::Host, Index >& csrMatrix, - bool writeMatrixInfo ) const -{ - String bgColor; - switch( groupSize ) - { - case 16: bgColor = "#5555FF"; break; - case 32: bgColor = "#9999FF"; break; - case 64: bgColor = "#CCCCFF"; break; - default: bgColor = "#FFFFFF"; - } - if( writeMatrixInfo ) - { - String baseFileName( inputMtxFile ); - baseFileName += String( ".rgcsr-"); - baseFileName += String( groupSize ); - String matrixPdfFile( baseFileName ); - matrixPdfFile += String( ".pdf" ); - String matrixHtmlFile( baseFileName ); - matrixHtmlFile += String( ".html" ); - tnlRgCSR< Real > rgCsrMatrix( inputMtxFile ); - rgCsrMatrix. tuneFormat( this->groupSize, - this->useAdaptiveGroupSize, - this->adaptiveGroupSizeStrategy ); - rgCsrMatrix. copyFrom( csrMatrix ); - this->printMatrixInHtml( matrixHtmlFile, rgCsrMatrix ); - logFile << " <td bgcolor=" << bgColor << "> <a href=\"" << matrixPdfFile << "\">PDF</a>,<a href=\"" << matrixHtmlFile << "\"> HTML</a></td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << "> " << this->getArtificialZeroElements() << "</td>" << std::endl; - } - if( this->getBenchmarkWasSuccesful() ) - { - const double speedUp = this->getGflops() / csrGflops; - bgColor = this->getBgColorBySpeedUp( speedUp ); - logFile << " <td bgcolor=" << bgColor << ">" << this->getTime() << "</td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << "> " << this->getGflops() << "</td>" << std::endl; - logFile << " <td bgcolor=" << bgColor << "> " << speedUp << "</td>" << std::endl; - } - else - { - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - logFile << " <td bgcolor=#FF0000> N/A </td>" << std::endl; - } -} - - -#endif /* TNLSPMVBENCHMARKRGCSRMATRIX_H_ */ diff --git a/src/TNL/legacy/benchmarks/tnlSpmvBenchmark_impl.h b/src/TNL/legacy/benchmarks/tnlSpmvBenchmark_impl.h deleted file mode 100644 index 24bc0dca0ce5c0fe82018d29d8790a10c3bdf63c..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/benchmarks/tnlSpmvBenchmark_impl.h +++ /dev/null @@ -1,49 +0,0 @@ -/*************************************************************************** - tnlSpmvBenchmark_impl.h - description - ------------------- - begin : Dec 29, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLSPMVBENCHMARK_IMPL_H_ -#define TNLSPMVBENCHMARK_IMPL_H_ - -template< typename Real, - typename Device, - typename Index > -bool tnlSpmvBenchmark< CSR< Real, Device, Index > >::setup( const CSR< RealType, Devices::Host, IndexType >& matrix ) -{ - this->matrix = matrix; -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmark< CSR< Real, Device, Index > >::tearDown() -{ - this->matrix.reset(); -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmark< CSR< Real, Device, Index > >::writeProgress() const -{ -} - -template< typename Real, - typename Device, - typename Index > -void tnlSpmvBenchmark< CSR< Real, Device, Index > >::writeToLogTable( std::ostream& logFile, - const double& csrGflops, - const String& inputMtxFile, - const CSR< RealType, Devices::Host, IndexType >& csrMatrix, - bool writeMatrixInfo ) const -{ - -} - -#endif /* TNLSPMVBENCHMARK_IMPL_H_ */ diff --git a/src/TNL/legacy/curve/Curve.h b/src/TNL/legacy/curve/Curve.h deleted file mode 100644 index f6f0408db82016340bbce152de4fdeb3111ab4f8..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/curve/Curve.h +++ /dev/null @@ -1,223 +0,0 @@ -/*************************************************************************** - Curve.h - description - ------------------- - begin : 2007/06/27 - copyright : (C) 2007 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <iomanip> -#include <fstream> -#include <cstring> -#include <TNL/Containers/List.h> -#include <TNL/Object.h> -#include <TNL/Math.h> -#include <TNL/Containers/StaticVector.h> -#include <TNL/param-types.h> - -namespace TNL { - -//! Basic structure for curves -template< class T > -class CurveElement -{ - public: - CurveElement() {}; - - CurveElement( const T& pos, - bool _speparator = false ) - : position( pos ), - separator( _speparator ) {}; - - bool save( File& file ) const - { - if( ! file. write( &position ) ) - return false; - if( ! file. write( &separator ) ) - return false; - return true; - }; - - bool load( File& file ) - { - if( ! file. read( &position ) ) - return false; - if( ! file. read( &separator ) ) - return false; - return true; - }; - - T position; - - bool separator; -}; - -template< class T > -class Curve - : public Object, - public Containers::List< CurveElement< T > > -{ - public: - //! Basic contructor - Curve( const char* name ) - : Object() -// FIXME: name property has been removed from Object -// : Object( name ) - { - }; - - //! Destructor - ~Curve() - { }; - - String getType() const - { - return String( "Curve< " ) + String( TNL::getType< T >() ) + String( " >" ); - }; - - //! Append new point - void Append( const T& vec, bool separator = false ) - { - Containers::List< CurveElement< T > > :: Append( CurveElement< T >( vec, separator ) ); - }; - - //! Erase the curve - void Erase() - { - Containers::List< CurveElement< T > >::reset(); - }; - - //! Method for saving the object to a file as a binary data - bool save( File& file ) const - { - if( ! Object :: save( file ) ) return false; - if( ! Containers::List< CurveElement< T > > :: DeepSave( file ) ) return false; - return true; - }; - - //! Method for restoring the object from a file - bool load( File& file ) - { - if( ! Object :: load( file ) ) return false; - if( ! Containers::List< CurveElement< T > > :: DeepLoad( file ) ) return false; - return true; - }; - - //! Method for saving the object to a file as a binary data - bool save( const String& fileName ) const - { - return Object :: save( fileName ); - }; - - //! Method for restoring the object from a file - bool load( const String& fileName ) - { - return Object :: load( fileName ); - }; - -}; - -template< class T > bool Write( const Curve< T >& curve, - std::ostream& str, - const char* format, - const int step = 1 ) -{ - if( ! format ) - { - std::cerr << "No format given for drawing 2D grid. " << std::endl; - return false; - } - if( curve. isEmpty() ) - { - std::cerr << "Unable to draw curve, it's empty!" << std::endl; - return false; - } - if( strcmp( format, "gnuplot" ) == 0 ) - { - const int size = curve. getSize(); - int i, j; - for( i = 0; i < size; i += step ) - { - if( curve[ i ]. separator ) - str << std::endl; - else - str << std::setprecision( 12 ) - << curve[ i ]. position[ 0 ] << " " - << curve[ i ]. position[ 1 ] << std::endl; - for( j = 0; j < step; j ++ ) - if( curve[ i + j ]. separator ) str << std::endl; - } - return true; - } - std::cerr << "Unknown format '" << format << "' for drawing a curve." << std::endl; - return false; -}; - -template< class T > bool Write( const Curve< T >& curve, - const char* file_name, - const char* format, - const int step = 1 ) -{ - - if( strncmp( format, "tnl",3 ) == 0 ) - { - File file; - if( ! file. open( String( file_name ) + String( ".tnl" ), IOMode::write ) ) - { - std::cerr << "I am not able to open the file " << file_name << " for drawing curve." << std::endl; - return false; - } - if( ! curve. save( file ) ) - { - std::cerr << "I am not able to write to the file " << file_name << " for drawing grid." << std::endl; - return false; - } - file. close(); - } - else - { - std::fstream file; - file. open( file_name, std::ios::out ); - if( ! file ) - { - std::cerr << "I am not able to to open the file " << file_name << " for drawing curve." << std::endl; - return false; - } - bool result = Write( curve, file, format, step ); - file. close(); - if( ! result ) - { - std::cerr << "Sorry I could not write to the file " << file_name << std::endl; - return false; - } - } - return true; -}; - -template< class T > bool Read( Curve< T >& crv, - const char* input_file ) -{ - File file; - if( ! file. open( String( input_file ), IOMode::read ) ) - { - std::cout << " unable to open file " << input_file << std::endl; - return false; - } - if( ! crv. load( file ) ) - { - std::cout << " unable to restore the data " << std::endl; - return false; - } - file. close(); - return true; -} - -// Explicit instatiation -template class Curve< Containers::StaticVector< 2, double > >; - -} // namespace TNL - diff --git a/src/TNL/legacy/curve/tnlcurve2gnuplot.cpp b/src/TNL/legacy/curve/tnlcurve2gnuplot.cpp deleted file mode 100644 index 5a1e297d60973b8a1a6f8f7f053732cdcce9fb0c..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/curve/tnlcurve2gnuplot.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/*************************************************************************** - tnlCurve2gnuplot.cpp - description - ------------------- - begin : 2007/12/16 - copyright : (C) 2007 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include <TNL/Config/ParameterContainer.h> -#include <TNL/Curve.h> -#include <TNL/Containers/StaticVector.h> - -using namespace TNL; - -void setupConfig( Config::ConfigDescription& config ) -{ - config.addDelimiter ( "General settings:" ); - config.addRequiredList< String >( "input-files", "Input files." ); - config.addList< String >( "output-files", "Output files." ); - config.addEntry< int >( "output-step", "Decrease number of the output curve nodes." ); - config.addEntry< String >( "output-file-format", "Output file format. Can be gnuplot.", "gnuplot" ); -} - -//-------------------------------------------------------------------------- -int main( int argc, char* argv[] ) -{ - Config::ParameterContainer parameters; - Config::ConfigDescription conf_desc; - - setupConfig( conf_desc ); - if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) - { - conf_desc.printUsage( argv[ 0 ] ); - return 1; - } - - Containers::List< String > input_files = parameters. getParameter< Containers::List< String > >( "input-files" ); - Containers::List< String > output_files; - if( ! parameters. getParameter< Containers::List< String > >( "output-files", output_files ) ) - std::cout << "No output files were given." << std::endl; - int output_step( 1 ); - parameters. getParameter< int >( "output-step", output_step ); - String output_file_format = parameters. getParameter< String >( "output-file-format" ); - - int size = input_files. getSize(); - /*if( size != output_files. getSize() ) - { - std::cerr << "Sorry, there is different number of input and output files." << std::endl; - return 1; - }*/ - int i; - Curve< Containers::StaticVector< 2, double > > crv( "tnlcurve2gnuplot:curve" ); - for( i = 0; i < size; i ++ ) - { - const char* input_file = input_files[ i ]. getString(); - std::cout << "Processing file " << input_file << " ... " << std::flush; - - File file; - if( ! file. open( input_files[ i ], IOMode::read ) ) - { - std::cout << " unable to open file " << input_files[ i ] << std::endl; - continue; - } - if( ! crv. load( file ) ) - { - std::cout << " unable to restore the data " << std::endl; - continue; - } - file. close(); - - Curve< Containers::StaticVector< 2, double > > out_crv( "tnlcurve2gnuplot:outcurve" ); - const int size = crv. getSize(); - int i; - for( i = 0; i < size; i += output_step ) - { - out_crv. Append( crv[ i ]. position, crv[ i ]. separator ); - //StaticVector< 2, double > v = crv[ i ]. position; - //v[ 0 ] = u( i ); - //v[ 1 ] = u( i + 1 ); - //out_crv. Append( v ); - } - - String output_file_name; - if( ! output_files. isEmpty() ) output_file_name = output_files[ i ]; - else - { - if( output_file_format == "gnuplot" ) - output_file_name += ".gplt"; - } - std::cout << " writing... " << output_file_name << std::endl; - if( ! Write( out_crv, output_file_name. getString(), output_file_format. getString() ) ) - { - std::cerr << " unable to write to " << output_file_name << std::endl; - } - } -} diff --git a/src/TNL/legacy/incompressible-navier-stokes/CMakeLists.txt b/src/TNL/legacy/incompressible-navier-stokes/CMakeLists.txt deleted file mode 100755 index 1cf94862f06974476a4a48ab83fccb97b269a27d..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -set( tnl_incompressible_navier_stokes_SOURCES - tnl-incompressible-navier-stokes.cpp -tnlExplicitINSTimeStepper_impl.h -tnlExplicitINSTimeStepper.h -tnlIncompressibleNavierStokesProblem_impl.h -tnlIncompressibleNavierStokesProblem.h -tnlNSFastBuildConfig.h -visit_writer.h -visit_writer.cpp -solver.h -base.h - ) - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(tnl-incompressible-navier-stokes${debugExt} tnl-incompressible-navier-stokes.cu) - -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE(tnl-incompressible-navier-stokes${debugExt} ${tnl_incompressible_navier_stokes_SOURCES}) - -ENDIF( BUILD_CUDA ) - -target_link_libraries (tnl-incompressible-navier-stokes${debugExt} tnl${debugExt}-${tnlVersion} ) - -INSTALL( TARGETS tnl-incompressible-navier-stokes${debugExt} - RUNTIME DESTINATION bin - PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) - -#INSTALL( FILES tnl-run-incompressible-navier-stokes -# DESTINATION share/tnl-${tnlVersion}/examples/incompressible-navier-stokes ) diff --git a/src/TNL/legacy/incompressible-navier-stokes/base.h b/src/TNL/legacy/incompressible-navier-stokes/base.h deleted file mode 100644 index 725d275f6b5a96c79fb8a2164f80ee6b443db62e..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/base.h +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef __BASE_H__ -#define __BASE_H__ -#define USE_CUDA 0 - -#include <cmath> -#include "base_structs.h" - -inline bool Equal(double a, double b) -{ - a = abs(a); b = abs(b); - double bigger = (a> b)?a: b; - if (bigger < 1e-8) return true; - double diff = a-b; - if (diff < 0) diff *=-1; - if (diff < 0.0000001*bigger || diff < 0.0000001) return true; - - if (std::isnan((double) a) && std::isnan((double) b)) return true; - if (std::isinf(a) && std::isinf(b)) return true; - return false; -} - -bool CheckResults(ArrayD &a, ArrayD &b) -{ - if (a.size() != b.size()) {printf("Array sizes don't match\n"); return false;} - int count = 0; - ArrayD aa, ba; - ArrayBaseD ac=a, bc=b; - if (a._onGPU) {aa.clone(a,false,false); ac=aa;} - if (b._onGPU) {ba.clone(b,false,false); bc=ba;} - for (int i = 0 ; i < ac.size(); i++) - if (!Equal(ac[i], bc[i])) - { - printf("Chyba na %d mezi %e %e dela %f\n", i, ac[i], bc[i], abs(ac[i]-bc[i])/std::max(ac[i], bc[i])); - if (count++ > 20) return false; - } - if (count==0) printf("Shodne\n"); - return count==0; -} - -template <class MatrixType> void Mult(const MatrixType &A, const double * vec, double *out) -{ - //if (vec.size()< A.num_cols()) throw "CSRMatrix::mult - vec vector not big enough"; - - int N = A.num_rows(); - #pragma omp parallel for schedule(static) - for (int ri = 0; ri < N; ri++) - { - out[ri] = 0; - for (int i = A.num_in_row(ri)-1; i>=0; i--) - { - double val=0; int col=0; - A.get_el_in_row(ri,i,val,col); - out[ri] += val * vec[col]; - } - } -} - -template <class MatrixType> -__cuda_call__ void JacobiIterKernel(const MatrixType &A, const ArrayBaseD &b, const ArrayBaseD & x, ArrayBaseD & out_x, const double damping, int r) -{ - double diag = 1; - double nonDiag = 0; - - for (int i = 0; i < A.num_in_row(r); i++) - { - double aVal; int c; - A.get_el_in_row(r,i,aVal,c); - - if (c==r) diag = aVal; - else nonDiag += aVal * x[c]; - } - out_x[r] = (1.0 - damping)*x[r] + damping*(b[r] - nonDiag)/diag; -} - -#if USE_CUDA -template <class MatrixType> -__global__ void JacobiIterGPU(const MatrixType A, const ArrayBaseD b, const ArrayBaseD x, ArrayBaseD out_x, const double damping) -{ - int r = blockIdx.x*blockDim.x + threadIdx.x; - if (r < b.size()) - JacobiIterKernel(A, b, x, out_x, damping, r); -} - -template <class MatrixType> __global__ void ResiduumGPU(const MatrixType A, const ArrayBaseD b, const ArrayBaseD x, double * result) -{ - __shared__ float sdata[blockSize]; - const unsigned int tid = threadIdx.x; - sdata[tid] = 0; - int r = blockIdx.x*blockDim.x + threadIdx.x; - if (r >= A.num_rows()) return; - sdata[tid] = ResiduumKernel(A, b, x, r); - __syncthreads(); - for( unsigned int s = blockDim.x/2 ; s > 0 ; s >>= 1 ) - { - if( tid < s ) sdata[tid] += sdata[tid + s]; - __syncthreads(); - } - if( tid == 0 ) result[blockIdx.x] = sdata[0]; - //if( tid == 0 ) atomicAdd(result, sdata[0]); //Doesn't work -} -#endif - -template <class MatrixType> void JacobiIter(const MatrixType &A, const ArrayD &b, const ArrayD & x, ArrayD & out_x, const double damping=1) -{ - const int n = A.num_rows(); - assert(A.num_cols() == n) ; - assert(b.size() == n); - assert(x.size() >= n); - assert(out_x.size() >= n); - assert(b._onGPU == x._onGPU && x._onGPU == out_x._onGPU); -#if USE_CUDA - if (x._onGPU) - { - JacobiIterGPU <<< gridSize(n), blockSize >>> (A.toKernel(), b, x, out_x, damping); - } - else -#endif - { -#pragma omp parallel for schedule(static) - for (int r = 0; r < n; r++) - JacobiIterKernel(A, b, x, out_x, damping, r); - } -} - - -template <class MatrixType> __cuda_call__ double ResiduumKernel(const MatrixType &A, const ArrayBaseD & b, const ArrayBaseD & x, int r) -{ - double res = 0; - for (int i = 0; i < A.num_in_row(r); i++) - { - double aVal = 0; int c = r; - A.get_el_in_row(r,i, aVal, c); - res += aVal*x[c]; - } - res = b[r] - res; - return res*res; -} - -template <class MatrixType> double Residuum(const MatrixType &A, const ArrayD & b, const ArrayD & x) -{ - const int n = A.num_rows(); - assert(A.num_cols() == n) ; - assert(b.size() == n); - assert(x.size() >= n); - assert(b._onGPU == x._onGPU); - double res = 0; -#if USE_CUDA - if (x._onGPU) - { - #if 0 - static double *resGPU = 0; - if (resGPU==0) cudaMalloc(&resGPU, sizeof(double)); - cudaMemset(resGPU, 0, sizeof(double)); - ResiduumGPU<MatrixType> <<< gridSize(n), blockSize >>> (A, b, x, resGPU); - cudaDeviceSynchronize(); - cudaMemcpy(&res, resGPU, sizeof(double), cudaMemcpyDeviceToHost); - #else - ArrayD resids(gridSize(n), true); - resids.fill(0); - ResiduumGPU <<< gridSize(n), blockSize >>> (A.toKernel(), b, x, resids.data); - resids.moveToCPU(); - for (int i = 0; i < resids.size(); i++) res+=resids[i]; - #endif - } - else -#endif //USE_CUDA - { -#pragma omp parallel for reduction(+:res) schedule(static) - for (int r = 0; r < n; r++) - res+=ResiduumKernel(A, b, x, r); - } - return sqrt(res); -} -#endif diff --git a/src/TNL/legacy/incompressible-navier-stokes/base_structs.h b/src/TNL/legacy/incompressible-navier-stokes/base_structs.h deleted file mode 100644 index a60b4b2a666561273c518a6ac3c5c989907e666e..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/base_structs.h +++ /dev/null @@ -1,470 +0,0 @@ -#pragma once -#include <math.h> -#include <string.h> - -#if USE_CUDA - #include "base_cuda.h" -#else - void CudaCheckError() {} - #define __cuda_call__ -#endif - -typedef unsigned int uint; -template <class T> T abs(const T &x){return (x<0)? -x:x;} -template <class T> T max(T a, T b){return (a>=b)? a:b;} -template <class T> T max(T a, T b, T c){return max(a, max(a,b));} -template <class T> T min(T a, T b){return (a<=b)? a:b;} -template <class T> inline T square(T x){return x*x;} -template<class T> inline double clamp(T x){ return x<0 ? 0 : x>1 ? 1 : x; } -template <class T> inline void clamp(T & val, T min, T max){if (val<min) val=min; if (val>max) val=max;} - -struct vec3 -{ - double x, y, z; - vec3(double x_=0, double y_=0, double z_=0){ x=x_; y=y_; z=z_; } - vec3& operator+=(const vec3 &b) { x += b.x; y += b.y; z += b.z; return *this; } - vec3& operator-=(const vec3 &b) { x -= b.x; y -= b.y; z -= b.z; return *this; } - vec3 operator+(const vec3 &b) const { return vec3(x+b.x,y+b.y,z+b.z); } - vec3 operator-(const vec3 &b) const { return vec3(x-b.x,y-b.y,z-b.z); } - vec3 operator*(double b) const { return vec3(x*b,y*b,z*b); } - vec3 mult(const vec3 &b) const { return vec3(x*b.x,y*b.y,z*b.z); } - double length() const{return sqrt(x*x+y*y+z*z);} - vec3& normalize(){ return *this = *this * (1/sqrt(x*x+y*y+z*z)); } - vec3 norm() const { vec3 res(*this); res.normalize(); return res; } - vec3& clamp(){ ::clamp<double>(x); ::clamp<double>(y); ::clamp<double>(z); return *this; } - double dot(const vec3 &b) const { return x*b.x+y*b.y+z*b.z; } - // cross: - vec3 operator%(const vec3 & b) const {return vec3(y*b.z-z*b.y,z*b.x-x*b.z,x*b.y-y*b.x);} - static double Dot(const vec3 & a, const vec3 & b){ return a.dot(b); } - static vec3 Cross(const vec3 & a, const vec3 & b){return a%b;} - static vec3 Mult (const vec3 & a, const vec3 & b){return a.mult(b);} -}; - -struct vec2i -{ - int x,y; - __cuda_call__ vec2i(){x=y=0;} - __cuda_call__ vec2i(int x, int y){this->x=x; this->y=y;} -}; - -template <class T> struct ArrayBase -{ -public: - - int w,h,d; //vector dimensions - T * data; - - __cuda_call__ ArrayBase (){w=h=d=0; data=0;} - __cuda_call__ ArrayBase (T *data, int w, int h, int d){this->data=data; this->w=w; this->h=h; this->d=d;} - __cuda_call__ T & operator [] (int i){return data[i];} - __cuda_call__ const T & operator [] (int i) const {return data[i];} - __cuda_call__ T & operator() (int x, int y) { return data[y*w+x]; } - __cuda_call__ const T & operator() (int x, int y) const { return data[y*w+x]; } - __cuda_call__ int size () const {return w*h*d;} - - __cuda_call__ int width() const {return w;} - __cuda_call__ int height() const {return h;} - __cuda_call__ int depth() const {return d;} - - __cuda_call__ int index(int x, int y) const {return y*w+x;} - __cuda_call__ vec2i index2D(int i) const { int y = i/w; return vec2i(i-y*w,y);} - operator T* (){return data;} - - void set(const ArrayBase<T>& arr) {set(arr.data, arr.w, arr.h, arr.d);} - void set(T *data, int w, int h, int d){this->data=data; this->w=w; this->h=h; this->d=d;} -}; -typedef ArrayBase<double> ArrayBaseD; - -template<typename T> -class Array : public ArrayBase<T> -{ -public: - - Array<T>* _bindedFrom; //If case this is only shared array, this points to the parent data array - bool _onGPU; - - Array(){ _bindedFrom = 0; _onGPU = false; } - Array(int size, bool onGPU = false){ this->_onGPU = onGPU; _bindedFrom = 0; resize1d(size); } - Array(const Array<T> & arr){throw "Not supported";} - ArrayBase<T> toArr() const {return ArrayBase<T>(this->data,this->w,this->h,this->d);} - bool onGPU()const { return _bindedFrom ? _bindedFrom->onGPU() : _onGPU; } - - void copy(const Array<T> & arr) - { - if (this->size() < arr.size()) throw "Array isn't big enough"; -#if USE_CUDA - cudaMemcpyKind copyKind = arr._onGPU? cudaMemcpyDeviceToHost : cudaMemcpyHostToHost; - if (_onGPU) copyKind = arr._onGPU? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice; - cudaMemcpy(this->data, arr.data, arr.size()* sizeof(T), copyKind); -#else - memcpy(this->data, arr.data, arr.size()* sizeof(T)); -#endif - } - void fill (T val) - { -#if USE_CUDA - if (_onGPU) - FillGPU <<< gridSize(this->size()), blockSize >>> (this->data, val, this->size()); - else -#else - for (int i = this->size()-1; i >= 0; i--) this->data[i] = val; -#endif - } - void bind1d(Array<T> & arr, int offset, int size){ unbindOrFree(); this->set(&arr[offset], size, 1, 1); _bindedFrom = &arr; } - void unbindOrFree(){ if (!_bindedFrom) free(); this->data = 0; this->w = this->h = this->d = 0; _bindedFrom = 0; } - - - static T* Alloc(int size, bool onGPU) - { - T * res = 0; -#if USE_CUDA - if (onGPU) - cudaMalloc(&res, size*sizeof(T)); - else -#else - res = (size > 0) ? new T[size] : 0; -#endif - return res; - } - void free() - { - if (_bindedFrom) throw "Cant free not own data"; - if (this->data){ -#if USE_CUDA - if (this->_onGPU) - cudaFree(this->data); - else -#else - delete[] this->data; -#endif - } - this->data=0; this->w=this->h=this->d=0; _bindedFrom=0; - } - void resize(int newSize, bool leaveMore = false) - { - if ( (!leaveMore && this->size() != newSize) || this->size() < newSize) - { - free(); - this->data = Alloc(newSize, this->_onGPU); - _bindedFrom = 0; - } - this->w=newSize; this->h=this->d=1; - } - void resize1d(int size){resize(size); } - void clone(const Array<T> & arr, bool leaveMore, bool moveToGPU) - { - if ( (!leaveMore && arr.size()!=this->size()) || arr.size() > this->size() ) - { - free(); - this->_onGPU = moveToGPU; - resize(arr.size()); - } - else move(moveToGPU); - this->w=arr.w; this->h=arr.h; this->d=arr.d; - this->copy(arr); - } - void clone(const Array<T> & arr, bool leaveMore = false){clone(arr, leaveMore, arr._onGPU);} - ~Array(){ if (!_bindedFrom) free(); } - - void move(bool toGPU) - { -#if USE_CUDA - if (this->_onGPU == toGPU) return; - T* newData = Alloc(this->size(), toGPU); - cudaMemcpy(newData, this->data, this->size()* sizeof(T), toGPU? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost); - if (this->data){ - if (this->_onGPU) cudaFree(this->data); - else delete[] this->data; - } - this->data = newData; - this->_onGPU = toGPU; -#else - assert(false); //CPU only version, what do you want to move? -#endif - } - void moveToGPU(){move(true);} - void moveToCPU(){move(false);} - - void print(const char * name = 0) - { - Array<T> aux; - ArrayBase<T> vec = *this; - if (_onGPU) { aux.clone(*this, false, false); vec = aux; } - printf("Printing vector %s\n", (name) ? name : "Noname"); - for (int i = 0; i < this->size(); i++) - printf("%d:%.10f, ", i, (double)vec.data[i]); - printf("\n\n"); - fflush(stdout); - } - - /*T norm() - { - T res = 0; - #pragma omp parallel for reduction(+:res) schedule(static) - for (int i = 0; i < _size; i++) res+=square(_data[i]); - return sqrt(res); - } - - - void add(const Array<T> & vec, T mult) - { - Add(*this, vec, 1.0, mult, *this); - } - - static void Add(const Array<T> & a, const Array<T> & b, const T aMult, const T bMult, Array<T> & res) - { - if (a.size()!=b.size() || b.size() > res.size()) throw "Array::Add - array sizes differ"; - #pragma omp parallel for schedule(static) - for (int i = 0; i < res.size(); i++) - res[i]=aMult*a[i]+bMult*b[i]; - } - void Subtract(const Array<T> & a, const Array<T> & b, Array<T> & res) - { - if (a.size()!=b.size() || b.size() > res.size()) throw "Array::Subtract - array sizes differ"; - #pragma omp parallel for schedule(static) - for (int i = 0; i < b.size(); i++) - res[i]=a[i]-b[i]; - }*/ -}; - -typedef Array<double> ArrayD; -typedef Array<int> ArrayI; - -template <class T> -class Array2D: public Array<T> -{ -public: - void resize2d(int width, int height){if (width==this->w && height==this->h) return; this->resize(width*height); - this->w=width; this->h=height; this->d=1;} - void bind2d(Array<T> & arr, int offset, int width, int height){ - this->unbindOrFree(); - this->set(&arr.data[offset], width, height, 1); - this->_bindedFrom=&arr;} - void bind3d(Array<T> & arr, int offset, int width, int height, int depth){this->unbindOrFree(); - this->set(&arr[offset], width, height, depth); this->_bidnFrom=&arr;} - void clone(const Array2D<T>& arr) - { - Array<T>::clone(arr); - this->w = arr.width(); this->h=arr.height(); - } - void fillBorders(T val){ - if (this->_onGPU) throw "FillBorders isn't implemented on GPU yet."; - for (int x=0; x < this->w; x++) this->data[x] = this->data[(this->h-1)*this->w + x] = val; - for (int y=1; y < this->h-1; y++) this->data[y*this->w] = this->data[y*this->w + this->w-1] = val; - } -}; -typedef Array2D<double> arr2D; - - -struct GPUMatrix -{ - int _num_rows, _num_cols; - ArrayBaseD _vals; - ArrayBase<int> _cols, _rowStarts; - - __cuda_call__ int num_rows() const { return _num_rows; } - __cuda_call__ int num_cols() const { return _num_cols; } - __cuda_call__ int num_in_row(int row) const { return _rowStarts[row + 1] - _rowStarts[row]; } - __cuda_call__ inline void get_el_in_row(int row, int ind_in_row, double & out_val, int & out_col) const { - int i = _rowStarts[row] + ind_in_row; out_val = _vals[i]; out_col = _cols[i]; - } - inline double& get_val_in_row(int row, int ind_in_row){ return _vals[_rowStarts[row] + ind_in_row]; } - __cuda_call__ inline double get_val_in_row(int row, int ind_in_row) const { return _vals[_rowStarts[row] + ind_in_row]; } - __cuda_call__ inline int get_col_index(int row, int ind_in_row) const { return _cols[_rowStarts[row] + ind_in_row]; } - __cuda_call__ double get_diag(int row) const - { - for (int i = _rowStarts[row]; i < _rowStarts[row + 1]; i++) - if (_cols[i] == row) return _vals[i]; - return -1e100; - //throw "Diagonal element not found"; - } -}; - -class MatrixCSR -{ -public: - ArrayD _vals; - ArrayI _cols; - ArrayI _rowStarts; - int _num_rows, _num_cols; - - MatrixCSR(){_num_cols=_num_rows=0;} - MatrixCSR(const MatrixCSR & mat){throw "Copy constructor for MatrixCSR doesn't exist";} - __cuda_call__ int num_rows() const { return _num_rows; } - __cuda_call__ int num_cols() const { return _num_cols; } - __cuda_call__ int num_in_row(int row) const { return _rowStarts[row + 1] - _rowStarts[row]; } - GPUMatrix toKernel() const - { - assert(_vals.onGPU()); - GPUMatrix res; - res._num_rows = _num_rows; res._num_cols = _num_cols; - res._rowStarts = _rowStarts; - res._cols = _cols; res._vals = _vals; - return res; - } - void resize(int num_rows, int num_cols, int num_values) - { - if (_vals.size()!=num_values) - { - _vals.resize(num_values); - _cols.resize(num_values); - } - if (_rowStarts.size()!=num_rows+1) _rowStarts.resize(num_rows+1); - _rowStarts.fill(0); - _num_rows=num_rows; - _num_cols=num_cols; - } - void clear() - { - _vals.fill(0); - _cols.fill(0); - _rowStarts.fill(0); - } - - template <class MatrixType> - void copyVals(const MatrixType & matToClone) - { - for (int ri = 0; ri < _num_rows; ri++) - { - int nr = num_in_row(ri), rs = _rowStarts[ri]; - for (int i = 0; i < nr; i++) - _vals[rs + i] = matToClone.get_val_in_row(ri, i); - } - } - - template <class MatrixType> - void clone(const MatrixType & matToClone) - { - _num_rows = matToClone.num_rows(); - _num_cols = matToClone.num_cols(); - _rowStarts.resize(_num_rows+1); - _rowStarts[0] = 0; - for (int ri = 0; ri < _num_rows; ri++) - _rowStarts[ri+1] = _rowStarts[ri] + matToClone.num_in_row(ri); - - int nne = _rowStarts[_num_rows]; - _vals.resize(nne); - _cols.resize(nne); - for (int ri = 0; ri < _num_rows; ri++) - { - int nr = matToClone.num_in_row(ri), rs = _rowStarts[ri]; - for (int i = 0; i < nr; i++) - matToClone.get_el_in_row(ri,i,_vals[rs+i],_cols[rs+i]); - } - } - - inline double& operator()(int ri, int ci) - { - if(ri>=_num_rows || ci>=_num_cols) throw "MatrixCSR - Index out of bounds"; - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++ ) - if (_cols[i] == ci) return _vals[i]; - throw "MatrixCSR - Value not found"; - } - inline const double& operator()(int ri, int ci) const - { - if(ri>=_num_rows || ci>=_num_cols) throw "MatrixCSR - Index out of bounds"; - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++ ) - if (_cols[i] == ci) return _vals[i]; - throw "MatrixCSR - Value not found"; - } - inline bool isNull(int ri, int ci) const - { - if(ri>=_num_rows || ci>=_num_cols) throw "MatrixCSR - Index out of bounds"; - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++ ) - if (_cols[i] == ci) return false; - return true; - } - __cuda_call__ inline void get_el_in_row(int row, int ind_in_row, double & out_val, int & out_col) const { - int i = _rowStarts[row] + ind_in_row; out_val = _vals[i]; out_col=_cols[i]; - } - inline double& get_val_in_row(int row, int ind_in_row){return _vals[_rowStarts[row] + ind_in_row];} - __cuda_call__ inline double get_val_in_row(int row, int ind_in_row) const { return _vals[_rowStarts[row] + ind_in_row]; } - __cuda_call__ inline int get_col_index(int row, int ind_in_row) const { return _cols[_rowStarts[row] + ind_in_row]; } - __cuda_call__ double get_diag(int row) const - { - for (int i = _rowStarts[row]; i < _rowStarts[row + 1]; i++) - if (_cols[i] == row) return _vals[i]; - return -1e100; - //throw "Diagonal element not found"; - } - void loadMMMatrix(const char * filename); - void mult(const double * vec, double *out) const - { - #pragma omp parallel for schedule(static) - for (int ri = 0; ri < _num_rows; ri++) - { - out[ri] = 0; - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++) - out[ri] += _vals[i] * vec[_cols[i]]; - } - } - void mult(const ArrayD & vec, ArrayD & out) const - { - if (out.size()< _num_rows) throw "CSRMatrix::mult - out vector not big enough"; - if (vec.size()< _num_cols) throw "CSRMatrix::mult - vec vector not big enough"; - #pragma omp parallel for schedule(static) - for (int ri = 0; ri < _num_rows; ri++) - { - out[ri] = 0; - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++) - out[ri] += _vals[i] * vec[_cols[i]]; - } - } - - void print() const - { - printf("Matrix %d x %d with %d values\n", _num_rows, _num_cols, _vals.size()); fflush(stdout); - for (int ri = 0; ri < _num_rows; ri++) - { - printf("Row %d - ", ri); - for (int i = _rowStarts[ri]; i < _rowStarts[ri+1]; i++) - printf("%d:%f, ",_cols[i], _vals[i]); - printf("\n"); - } - fflush(stdout); - } - - void moveToGPU(){ _rowStarts.moveToGPU(); _cols.moveToGPU(); _vals.moveToGPU(); } - void moveToCPU(){ _rowStarts.moveToCPU(); _cols.moveToCPU(); _vals.moveToCPU(); } - - /*static void MatrixMult(const MatrixCSR & A, const MatrixCSR & B, MatrixCSR & res) - { - if (A._num_cols != B._num_rows) throw "CSRMatrix::MatrixMult - A.cols and b.rows don't agree"; - - intArr aux(B.num_cols()); aux.fill(-1); - res._rowStarts.resize(A.num_rows()); - for (int r = 0; r < A.num_rows(); r++) - { - int actCols=0; - for (int i = 0; i < A.num_in_row(r); i++) - { - int ri = A.get_col_index(r,i); - for (int j = 0; j < B.num_in_row(ri); j++) - { - int ci = B.get_col_index(ri,j); - if (aux[ci]!=r) - { - actCols++; - aux[ci] = r; - } - } - } - res._rowStarts[r+1] = res._rowStarts[r+1]+actCols; - } - - #pragma omp parallel for schedule(static) - for (int r = 0; r < A.num_rows(); r++) - { - for (int i = 0; i < A.num_in_row(r); i++) - { - int ri = A.get_col_index(r,i); - for (int j = 0; j < B.num_in_row(ri); j++) - { - int ci = B.get_col_index(ri,j); - double val = res(r,ci); - res(r,ci) = val + A.get_val_in_row(r,i)*B.get_val_in_row(ri,j); - } - } - } - }*/ -}; diff --git a/src/TNL/legacy/incompressible-navier-stokes/solver.h b/src/TNL/legacy/incompressible-navier-stokes/solver.h deleted file mode 100644 index b4d0403d4c1eae1175efddad2f4bbe16c54c93d1..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/solver.h +++ /dev/null @@ -1,523 +0,0 @@ -#include <chrono> - -#if 1 - #include "base.h" -#else - #include "lin_alg.h" -#endif -#include <assert.h> - -/*Vytvoreno dle: -http://www.leb.eei.uni-erlangen.de/winterakademie/2008/report/content/course01/pdf/0105.pdf -http://math.mit.edu/~gs/cse/codes/mit18086_navierstokes.pdf - -*/ - -__cuda_call__ inline double HorAvg (const ArrayBaseD & arr, int x, int y) { return 0.5*(arr(x,y) + arr(x+1,y)); } -__cuda_call__ inline double VerAvg (const ArrayBaseD & arr, int x, int y) { return 0.5*(arr(x,y) + arr(x,y+1)); } -__cuda_call__ inline double HorDiff(const ArrayBaseD & arr, int x, int y) { return arr(x+1,y) - arr(x,y); } -__cuda_call__ inline double VerDiff(const ArrayBaseD & arr, int x, int y) { return arr(x,y+1) - arr(x,y); } -__cuda_call__ inline bool IsBoundary(int x, int y, int w, int h){return x==0||y==0||x==w-1||y==h-1;} -__cuda_call__ inline bool IsBoundary(const ArrayBaseD & arr, int x, int y){ return x==0 || y==0 || x==arr.width()-1 || y==arr.height()-1;} -__cuda_call__ inline bool IsOut(const ArrayBaseD & arr, int x, int y){ return x < 0 || y < 0 || x >= arr.width() || y >= arr.height(); } - -class RegularMesh -{ - int N; - bool centerIsBoundary(int x, int y){ return x == 0 || y == 0 || x == N - 1 || y == N - 1; } - bool verFaceIsBoundary(int x, int y){ return x == 0 || y == 0 || x == N || y == N - 1; } - bool horFaceIsBoundary(int x, int y){ return x == 0 || y == 0 || x == N -1 || y == N; } - int center(int x, int y){return y*N + x;} - int verFace(int x, int y){ return y*(N + 1) + x; } - int horFace(int x, int y){ return y*N + x; } - int numCenters() { return N*N; } - int numVerFaces() { return (N + 1)*N; } - int numHorFaces() { return N*(N + 1); } - - vec2i verFaceLeftToCenter(int x, int y){ assert(x >= 0); return vec2i(x, y); } - vec2i verFaceRightToCenter(int x, int y){ assert(x < N); return vec2i(x + 1, y); } - vec2i horFaceUpToCenter(int x, int y){ assert(y >= 0); return vec2i(x, y); } - vec2i horFaceDownToCenter(int x, int y){ assert(y < N); return vec2i(x, y+1); } -}; - -class EmptyMatrix -{ - __cuda_call__ int num_rows() const {return 0;} - __cuda_call__ int num_cols() const {return 0;} - __cuda_call__ int num_in_row(int row) const {return 0;} - __cuda_call__ void get_el_in_row(int row, int ind_in_row, double & val, int &col) const {val=0; col=-1;} - __cuda_call__ double get_diag(int row) const {return 0;} - EmptyMatrix& toKernel() { return *this; } - const EmptyMatrix& toKernel() const { return *this; } -}; - -class IdentityMatrix -{ - int _size; -public: - IdentityMatrix() :_size(0){} - IdentityMatrix(int size) : _size(size){} - __cuda_call__ int num_rows() const { return _size; } - __cuda_call__ int num_cols() const { return _size; } - __cuda_call__ int num_in_row(int row) const { return 1; } - __cuda_call__ void get_el_in_row(int row, int ind_in_row, double & val, int &col) const { val = 1; col = row; } - __cuda_call__ double get_diag(int row) const { return 1; } - IdentityMatrix& toKernel() { return *this; } - const IdentityMatrix& toKernel() const { return *this; } -}; - -class SimpleMatrix2D -{ - ArrayBaseD _var; - double _diag; - double _off; -public: - SimpleMatrix2D(){_diag=1;_off=0;} - SimpleMatrix2D(Array2D<double>& var, double diag, double off){set(var,diag,off);} - void set(Array2D<double>& var, double diag, double off){_var.set(var); _diag=diag; _off=off;} - SimpleMatrix2D& toKernel() { return *this; } - const SimpleMatrix2D& toKernel() const { return *this; } - __cuda_call__ int num_rows() const {return _var.size();} - __cuda_call__ int num_cols() const {return _var.size();} - __cuda_call__ int num_in_row(int row) const { - vec2i coord = _var.index2D(row); - return IsBoundary(_var, coord.x, coord.y)? 1 : 5; - } - __cuda_call__ void get_el_in_row(int row, int ind_in_row, double & val, int &col) const { - vec2i coord = _var.index2D(row); - //if (IsBoundary(_var, coord.x, coord.y)){ col = row; val = 1; return; } - int x=coord.x, y=coord.y, w = _var.width(); - { - if (x==0) {col = row+1; val = 1; return;} - else if (y==0) {col = row+w; val = 1; return;} - else if (x==_var.width()-1 ) {col = row-1; val = 1; return;} - else if (y==_var.height()-1) {col = row-w; val = 1; return;} - } - switch(ind_in_row) - { - case 0: val = _diag; col = row; break; - case 1: val = _off; col = row-1; break; - case 2: val = _off; col = row+1; break; - case 3: val = _off; col = row-_var.width(); break; - case 4: val = _off; col = row+_var.width(); break; - } - } -}; - -class AdvectDiffusionMatrix2D -{ -public: - ArrayBaseD u, v; - double visc, dt; - - AdvectDiffusionMatrix2D(){ visc = dt = 0; } - AdvectDiffusionMatrix2D(Array2D<double> &u, Array2D<double> &v, double visc, double dt){set(u,v,visc,dt);} - void set(Array2D<double> &u, Array2D<double> &v, double visc, double dt) - { - this->u.set(u); this->v.set(v); this->visc=visc; this->dt = dt; - } - AdvectDiffusionMatrix2D& toKernel() { return *this; } - const AdvectDiffusionMatrix2D& toKernel() const { return *this; } - - __cuda_call__ int num_rows() const {return u.size()+v.size();} - __cuda_call__ int num_cols() const {return u.size()+v.size();} - __cuda_call__ int num_in_row(int row) const { - const ArrayBaseD *act = row>=u.size()? &v : &u; - vec2i coord = act->index2D(row - (row>=u.size()? u.size() : 0)); - return IsBoundary(*act, coord.x, coord.y)? 1 : 5; - } - __cuda_call__ void get_el_in_row(int row, int ind_in_row, double & val, int &col) const - { - const ArrayBaseD *act = row>=u.size()? &v : &u; - vec2i coord = act->index2D(row - (row>=u.size()? u.size() : 0)); - int x=coord.x, y=coord.y, w = act->width(); - if (IsBoundary(*act,x,y)) {col = row; val = 1; return;} - - const double dx = 1.0/u.height(), dy=dx, vix = dt*visc/(dx*dx), viy=dt*visc/(dy*dy); - double cxm=0,cym=0,cxp=0,cyp=0; - if (act==&u) - { - cxm = -0.25*HorAvg(u,x-1,y)/dx; cxp = 0.25*HorAvg(u,x,y)/dx; - cym = -0.25*HorAvg(v,x-1,y)/dy; cyp = 0.25*HorAvg(v,x-1,y+1)/dy; - } - else - { - cxm = -0.25*VerAvg(u,x,y-1)/dx; cxp = 0.25*VerAvg(u,x+1,y-1)/dx; - cym = -0.25*VerAvg(v,x,y-1)/dy; cyp = 0.25*VerAvg(v,x,y)/dy; - } - switch(ind_in_row) - { - case 0: val = 1+dt*(cxm+cxp+cym+cyp)+2*vix+2*viy; col = row; break; //Diagonal element - case 1: val = dt*cxm-vix; col = row-1; break; - case 2: val = dt*cxp-vix; col = row+1; break; - case 3: val = dt*cym-viy; col = row-w; break; - case 4: val = dt*cyp-viy; col = row+w; break; - case 10: val = 1+2*dt*(cxm+cxp+cym+cyp); col =row; break; //special number for sum of whole row - } - } - __cuda_call__ double get_val_in_row(int row, int ind_in_row) const{ - double val; int col; - get_el_in_row(row, ind_in_row, val, col); - return val; - } - __cuda_call__ double get_diag(int row) const - { - double val; int col; - get_el_in_row(row, 0, val, col); - return val; - } -}; - -class AdvectModifPoisson -{ - const ArrayBaseD *u,*v, *p; - const AdvectDiffusionMatrix2D * adMat; -public: - AdvectModifPoisson(Array2D<double> *p, const AdvectDiffusionMatrix2D * adMat){ - this->p = p; - this->adMat = adMat; - this->u = &(adMat->u); - this->v = &(adMat->v); - } - __cuda_call__ int num_rows() const {return p->width()*p->height();} - __cuda_call__ int num_cols() const {return p->width()*p->height();} - __cuda_call__ int num_in_row(int row) const { - vec2i coord = p->index2D(row - (row>=u->size()? u->size() : 0)); - return IsBoundary(*p, coord.x, coord.y)? 1 : 5; - } - __cuda_call__ void get_el_in_row(int row, int ind_in_row, double & val, int &col) const - { - - vec2i coord = p->index2D(row - (row>=u->size()? u->size() : 0)); - int x=coord.x, y=coord.y, w = p->width(); - { - if (x==0) {col = row+1; val = 1; return;} - else if (y==0) {col = row+w; val = 1; return;} - else if (x==w-1) {col = row-1; val = 1; return;} - else if (y==p->height()-1) {col = row-w; val = 1; return;} - } - - const int elemInd = 0; - switch(ind_in_row) - { - case 0: - val = adMat->get_val_in_row( u->index(x-1,y-1), elemInd) + adMat->get_val_in_row( u->index(x, y-1), elemInd) + - adMat->get_val_in_row( v->index(x-1,y-1), elemInd) + adMat->get_val_in_row( v->index(x-1,y ), elemInd); - col = row; - break; - case 1: val = -adMat->get_val_in_row( u->index(x-1,y-1), elemInd); col = row-1; break; - case 2: val = -adMat->get_val_in_row( u->index(x, y-1), elemInd); col = row+1; break; - case 3: val = -adMat->get_val_in_row( v->index(x-1,y-1), elemInd); col = row-w; break; - case 4: val = -adMat->get_val_in_row( v->index(x-1,y ), elemInd); col = row+w; break; - } - } -}; - -#if USE_CUDA -__global__ void GPU_set_zero_neumann(ArrayBaseD a) -{ - int i = blockIdx.x*blockDim.x + threadIdx.x; - int ex = a.w-1, ey=a.h-1; - if (i < a.w-1) {a(i,0) = a(i,1); a(i,ey) = a(i,ey-1);} - if (i < a.h-1) {a(0,i) = a(1,i); a(ex,i) = a(ex-1,i);} - if (i==0) - { - a(0,0)=a(1,1); - a(ex,0)=a(ex-1,1); - a(0,ey)=a(1,ey-1); - a(ex,ey)=a(ex-1,ey-1); - } -} - -__global__ void GPU_set_bnd(ArrayBaseD a, int type) //type is same as enum vars 0=var_u, 1=var_v ... -{ - int i = blockIdx.x*blockDim.x + threadIdx.x; - int ex = a.w-1, ey=a.h-1; - double top = type==0? 0.05 : 0; - if (i < a.w-1) {a(i,0) = 0; a(i,ey) = top;} - if (i < a.h-1) {a(0,i) = 0; a(ex,i) = 0;} - if (i==0) - { - a(0,0) = a(ex,0) = a(0,ey) = a(ex,ey) = 0; - } -} - -template <class MatrixType> -__global__ void GPU_pressure_correction(const int dir, double mult, const ArrayBaseD var, MatrixType mat, int indOff, ArrayBaseD res) //dir is 0 for X, or 1 for Y -{ - int x = blockIdx.x*blockDim.x + threadIdx.x, y = blockIdx.y*blockDim.y + threadIdx.y; - if (IsBoundary(var, x, y)) return; - - double aux = 0; - if (dir == 0) - aux += mult*(var(x+1,y+1) - var(x,y+1)); - else - aux += mult*(var(x+1,y+1) - var(x+1,y)); - int ind = res.index(x,y); - if (mat.num_rows() > 0) aux /= mat.get_diag(ind+indOff); - res[ind] += aux; -} -template <class MatrixType> -void Pressure_correction_GPU(double mult, Array2D<double> &u, Array2D<double> &v, Array2D<double> &p, MatrixType mat, Array<double> & res) -{ - ArrayBaseD pu, pv; - pu.set(res.data, u.w, u.h, u.d); - int uOff = u.size(); - pv.set(&(res.data[uOff]), u.w, u.h, u.d); - GPU_pressure_correction<MatrixType> <<< gridSize2D(u.w, u.h), blockSize2D >>> (0, mult, pu, mat, 0, res); - GPU_pressure_correction<MatrixType> <<< gridSize2D(v.w, v.h), blockSize2D >>> (0, mult, pv, mat, uOff, res); -} - -__global__ void GPU_calc_divergence(const ArrayBaseD u, const ArrayBaseD v, ArrayBaseD res, int N) -{ - int x = blockIdx.x*blockDim.x + threadIdx.x, y = blockIdx.y*blockDim.y + threadIdx.y; - if (IsOut(res, x, y)) return; - if (IsBoundary(res, x, y)) return; - res(x, y) = -0.5f*(u(x, y - 1) - u(x - 1, y - 1) + v(x - 1, y) - v(x - 1, y - 1)) / N; // -(u_x + v_y) -} - -template <class MatrixType> -__global__ void GPU_pressure_correction_u_part(const MatrixType mat, const ArrayBaseD u, const ArrayBaseD p, ArrayBaseD res, int N, int sign) -{ - int x = blockIdx.x*blockDim.x + threadIdx.x, y = blockIdx.y*blockDim.y + threadIdx.y; - if (IsOut(u, x, y)) return; - if (IsBoundary(u, x, y)) return; - - int ind = u.index(x, y); - res[ind] += sign*0.5f*N*(p(x + 1, y + 1) - p(x, y + 1)) / mat.get_diag(ind); -} -template <class MatrixType> -__global__ void GPU_pressure_correction_v_part(const MatrixType mat, const ArrayBaseD v, const ArrayBaseD p, ArrayBaseD res, int N, int sign, int vOff) -{ - int x = blockIdx.x*blockDim.x + threadIdx.x, y = blockIdx.y*blockDim.y + threadIdx.y; - if (IsOut(v, x, y)) return; - if (IsBoundary(v, x, y)) return; - - int ind = v.index(x, y) + vOff; - res[ind] += sign*0.5f*N*(p(x + 1, y + 1) - p(x + 1, y)) / mat.get_diag(ind); - -} -#endif //USE_CUDA - -void Calc_divergence(const Array2D<double> &u, const Array2D<double> &v, Array2D<double> &res, int N) -{ - assert(u.onGPU() == v.onGPU()); - assert(u.onGPU() == res.onGPU()); -#if USE_CUDA - if (res.onGPU()) - { - GPU_calc_divergence <<< gridSize2D(res.w, res.h), blockSize2D >>> (u, v, res, N); - } - else -#endif - { - for (int x = 1; x <= N; x++) for (int y = 1; y <= N; y++) { - res(x, y) = -0.5f*(u(x, y - 1) - u(x - 1, y - 1) + v(x - 1, y) - v(x - 1, y - 1)) / N; // -(u_x + v_y) - } - } -} - -template <class MatrixType> -static void pressureCorrectionWithA(const Array2D<double> & u, const Array2D<double> & v, const Array2D<double> & p, Array<double> & arr, int sign, - const MatrixType & mat) -{ - assert(arr.onGPU() == p.onGPU()); - int N = u.height(), vOff = u.size(); -#if USE_CUDA - if (arr.onGPU()) - { - GPU_pressure_correction_u_part <<< gridSize2D(u.w, u.h), blockSize2D >>> (mat.toKernel(), u, p, arr, N, sign); - GPU_pressure_correction_v_part <<< gridSize2D(v.w, v.h), blockSize2D >>> (mat.toKernel(), v, p, arr, N, sign, vOff); - return; - } -#endif - - for (int x = 1; x< u.width() - 1; x++) for (int y = 1; y< u.height() - 1; y++) - { - int ind = u.index(x, y); - arr[ind] += sign*0.5f*N*(p(x,y)-p(x-1,y))/mat.get_diag(ind); - } - for (int x = 1; x< v.width() - 1; x++) for (int y = 1; y< v.height() - 1; y++) - { - int ind = v.index(x, y) + vOff; - arr[ind] += sign*0.5f*N*(p(x,y)-p(x,y-1))/mat.get_diag(ind); - } -} - -class NSSolver -{ -public: - enum vars {var_u, var_v, var_p, var_d}; - int N; - //double diff, visc; - - Array<double> vels, vels0, aux, b; - Array2D<double> u, u0, v, v0, p, p0, pd, pd0; - AdvectDiffusionMatrix2D advectNoMat; - SimpleMatrix2D poissNoMat; - MatrixCSR advectMat, poissMat; - - NSSolver() - { - N=0; - } - - void init(int size) - { - N = size; - vels.resize(2*N*(N+1)); //for u,v - vels0.clone(vels); - aux.clone(vels); - b.clone(vels); b.fill(0); - - p.resize2d(N,N); p.fill(0); p0.clone(p); - pd.clone(p); pd0.clone(pd); - - rebind(); - reset(); - advectMat.clone(advectNoMat); - poissMat.clone(poissNoMat); - } - - void rebind() - { - u.bind2d(vels, 0, N + 1, N); v.bind2d(vels, N*(N + 1), N, N + 1); - u0.bind2d(vels0, 0, N + 1, N); v0.bind2d(vels0, N*(N + 1), N, N + 1); - advectNoMat.set(u, v, 0, 0); - poissNoMat.set(p, 4, -1); - } - - void reset() - { - u.fill(0); u0.fill(0); v.fill(0); v0.fill(0); p.fill(0); p0.fill(0); - set_bnd(var_u, u); set_bnd(var_u, u0); - set_bnd(var_v, v); set_bnd(var_v, v0); - set_bnd(var_p, p); set_bnd(var_p, p0); - } - - static void set_zero_neumann(arr2D & a) - { - int ex = a.width()-1, ey=a.height()-1; - for (int x=1; x < ex; x++) {a(x,0) = a(x,1); a(x,ey) = a(x,ey-1);} - for (int y=1; y < ey; y++) {a(0,y) = a(1,y); a(ex,y) = a(ex-1,y);} - a(0,0)=a(1,1); - a(ex,0)=a(ex-1,1); - a(0,ey)=a(1,ey-1); - a(ex,ey)=a(ex-1,ey-1); - } - - static void set_bnd ( vars b, arr2D & x ) - { -#if USE_CUDA - if (x.onGPU()) - { - int gs = gridSize(max(x.width(), x.height())); - if (b==var_p) - GPU_set_zero_neumann <<< gs, blockSize >>> (x); - else - GPU_set_bnd <<< gs, blockSize >>> (x, (int) b); - } - else -#endif - { - if (b==var_p) {set_zero_neumann(x); return;} - x.fillBorders(0); - if (b==var_u) for (int i=1 ; i<x.width()-1 ; i++ ) {x(i,x.height()-1) = 0.05;} - } - } - - template <class MatrixType> - void calcPressure(Array2D<double> & u, Array2D<double> & v, Array2D<double> & p, Array2D<double> & p0, const MatrixType &adMat) - { - p0.fill(0); p.fill(0); - int N = u.height(); - Calc_divergence(u, v, p0, N); - set_bnd ( var_p, p0 ); //zero neumann bnd. cond. - - for (int i = 0; i < 20; i++) - { - JacobiIter(poissNoMat,p0,p,aux); JacobiIter(poissNoMat,p0,aux,p); - set_bnd ( var_p, p ); - } - CudaCheckError(); - - /*for (int i = 0; i < p.size(); i++) - { - double val = p[i]; - if (val != p[i]) - throw "Error: There is a NaN in pressure."; - }*/ - } - /*static void pressureCorrection(const Array2D<double> & u, const Array2D<double> & v, const Array2D<double> & p, Array<double> & arr) - { - int N = u.height(), vOff = u.size(); - for ( int x=1 ; x< u.width()-1 ; x++ ) for (int y=1 ; y< u.height()-1 ; y++ ) - arr[u.index(x,y)] -= 0.5f*N*(p(x+1,y+1)-p(x,y+1)); - for ( int x=1 ; x< v.width()-1 ; x++ ) for (int y=1 ; y< v.height()-1 ; y++ ) - arr[v.index(x,y)+vOff] -= 0.5f*N*(p(x+1,y+1)-p(x+1,y)); - }*/ - - /*static void pressureCorrectionWithA2(const Array2D<double> & u, const Array2D<double> & v, const Array2D<double> & p, Array<double> & arr, double sign, - const AdvectDiffusionMatrix2D mat) - { - int N = u.height(), vOff = u.size(); - for ( int x=1 ; x< u.width()-1 ; x++ ) for (int y=1 ; y< u.height()-1 ; y++ ) - { - int ind = u.index(x,y); - arr[ind] += sign*0.5f*N*(p(x+1,y+1)-p(x,y+1))/mat.get_val_in_row(ind, 10); - } - for ( int x=1 ; x< v.width()-1 ; x++ ) for (int y=1 ; y< v.height()-1 ; y++ ) - { - int ind = v.index(x,y)+vOff; - arr[ind] += sign*0.5f*N*(p(x+1,y+1)-p(x+1,y))/mat.get_val_in_row(ind, 10); - } - }*/ - - static void createRHS(const Array<double> & vels0, const Array2D<double> & u, const Array2D<double> & v, const Array2D<double> & p, Array<double> & b) - { - b.copy(vels0); - pressureCorrectionWithA(u, v, p, b, -1, IdentityMatrix(b.size())); - //pressureCorrection(u,v,p,b); - } - - void prepareAdvectMat(double visc, double dt) - { - advectNoMat.set(u, v, visc, dt); - advectMat.copyVals(advectNoMat); - } - - void solveAdvectMat(int iter, double damping) - { - for (int i = 0; i < iter; i++) - { - JacobiIter(advectMat, b, vels, aux, damping); - JacobiIter(advectMat, b, aux, vels, damping); - - } - } - - void simulate_velocity(double visc, double dt) - { - vels0.copy(vels); p0.copy(p); - createRHS(vels0, u, v, p, b); - double residuum = 1e10; - int count=0; - prepareAdvectMat(visc,dt); - auto & matToUse = advectMat; - - for (int i = 0; i < 10; i++) - { - - solveAdvectMat(5, 0.7); - set_bnd ( var_u, u ); set_bnd ( var_v, v ); - pressureCorrectionWithA(u, v, p, vels, 1, matToUse); - calcPressure(u, v, p, p0, matToUse); - pressureCorrectionWithA(u, v, p, vels, -1, matToUse); - - createRHS(vels0, u, v, p, b); - residuum = Residuum(matToUse, b, vels); - CudaCheckError(); - count++; - } - } -}; diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cpp b/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cpp deleted file mode 100644 index ce15dfa47e129b953d2f9466e416a83047ce148b..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/*************************************************************************** - tnl-incompressible-navier-stokes.cpp - description - ------------------- - begin : Jan 28, 2015 - copyright : (C) 2015 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnl-incompressible-navier-stokes.h" diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cu b/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cu deleted file mode 100644 index b5cf257ca907bb0e375c8d78da55712514dceb15..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.cu +++ /dev/null @@ -1,18 +0,0 @@ -/*************************************************************************** - tnl-incompressible-navier-stokes.cu - description - ------------------- - begin : Jan 28, 2015 - copyright : (C) 2015 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#include "tnl-incompressible-navier-stokes.h" \ No newline at end of file diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.h b/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.h deleted file mode 100644 index 4e7edefc3aa62cf45defd35a08ec3503371445d1..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnl-incompressible-navier-stokes.h +++ /dev/null @@ -1,98 +0,0 @@ -/*************************************************************************** - tnl-incompressible-navier-stokes.h - description - ------------------- - begin : Jan 28, 2015 - copyright : (C) 2015 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNL_INCOMPRESSIBLE_NAVIER_STOKES_H_ -#define TNL_INCOMPRESSIBLE_NAVIER_STOKES_H_ - -#include <solvers/tnlSolver.h> -#include <operators/diffusion/tnlLinearDiffusion.h> -#include "tnlIncompressibleNavierStokesProblem.h" -#include "tnlNSFastBuildConfig.h" - -//typedef tnlDefaultConfigTag BuildConfig; -typedef tnlNSFastBuildConfig BuildConfig; - -template< typename ConfigTag > -class tnlIncompressibleNavierStokesConfig -{ - public: - static void configSetup( tnlConfigDescription& config ) - { - config.addDelimiter( "Incompressible Navier-Stokes solver settings:" ); - config.addEntry< double >( "viscosity", "Viscosity of the diffusion." ); - config.addEntry< double >( "inletVelocity", "Maximal X velocity on the inlet." ); - - /*config.addEntry< tnlString >( "boundary-conditions-type", "Choose the boundary conditions type.", "dirichlet"); - config.addEntryEnum< tnlString >( "dirichlet" ); - config.addEntryEnum< tnlString >( "neumann" ); - - config.addEntry< tnlString >( "boundary-conditions-file", "File with the values of the boundary conditions.", "boundary.tnl" ); - config.addEntry< double >( "boundary-conditions-constant", "This sets a value in case of the constant boundary conditions." ); - config.addEntry< double >( "right-hand-side-constant", "This sets a constant value for the right-hand side.", 0.0 ); - config.addEntry< tnlString >( "initial-condition", "File with the initial condition.", "initial.tnl");*/ - } -}; - -template< typename Mesh, typename Real = typename Mesh::RealType, typename Index = typename Mesh::IndexType > -class tnlINSBoundaryConditions{}; - -template< typename Mesh, typename Real = typename Mesh::RealType, typename Index = typename Mesh::IndexType > -class tnlINSRightHandSide{}; - -template< typename Mesh, typename Real = typename Mesh::RealType, typename Index = typename Mesh::IndexType > -class tnlIncompressibleNavierStokes -{ - public: - typedef Real RealType; - typedef typename Mesh::DeviceType DeviceType; - typedef Index IndexType; -}; - - -template< typename Real, typename Device, typename Index, typename MeshType, typename ConfigTag, typename SolverStarter > -class tnlIncompressibleNavierStokesSetter -{ -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - - typedef tnlStaticVector< MeshType::Dimensions, Real > Vertex; - - static bool run( const tnlParameterContainer& parameters ) - { - enum { Dimensions = MeshType::Dimensions }; - typedef tnlStaticVector < MeshType::Dimensions, Real > Vertex; - - typedef tnlINSBoundaryConditions< MeshType > BoundaryConditions; - typedef tnlIncompressibleNavierStokes< MeshType > ApproximateOperator; - typedef tnlINSRightHandSide< MeshType > RightHandSide; - typedef tnlIncompressibleNavierStokesProblem< MeshType, BoundaryConditions, RightHandSide, ApproximateOperator > Solver; - SolverStarter solverStarter; - return solverStarter.template run< Solver >( parameters ); - } -}; - -int main( int argc, char* argv[] ) -{ - tnlSolver< tnlIncompressibleNavierStokesSetter, tnlIncompressibleNavierStokesConfig, BuildConfig > solver; - if( ! solver. run( argc, argv ) ) - return EXIT_FAILURE; - return EXIT_SUCCESS; -} - -#endif /* TNL_INCOMPRESSIBLE_NAVIER_STOKES_H_ */ diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper.h b/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper.h deleted file mode 100644 index 3bdf51298738a2bfcc9d272547ce91cc779e7489..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper.h +++ /dev/null @@ -1,117 +0,0 @@ -/*************************************************************************** - tnlExplicitINSTimeStepper.h - description - ------------------- - begin : Feb 17, 2015 - copyright : (C) 2015 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_H_ -#define EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_H_ - -template< typename Problem, - typename LinearSolver > -class tnlExplicitINSTimeStepper -{ - public: - - typedef Problem ProblemType; - typedef typename Problem::RealType RealType; - typedef typename Problem::DeviceType DeviceType; - typedef typename Problem::IndexType IndexType; - typedef typename Problem::MeshType MeshType; - typedef typename ProblemType::DofVectorType DofVectorType; - - tnlExplicitINSTimeStepper(): problem(0), timeStep(0) {} - - static void configSetup( tnlConfigDescription& config, const tnlString& prefix = "" ) - { - config.addEntry< bool >( "verbose", "Verbose mode.", true ); - } - - bool setup( const tnlParameterContainer& parameters, - const tnlString& prefix = "" ) - { - //this->verbose = parameters.getParameter< bool >( "verbose" ); - return true; - } - - bool init( const MeshType& mesh ) - { - /*cout << "Setting up the linear system..."; - if( ! this->problem->setupLinearSystem( mesh, this->matrix ) ) - return false; - cout << " [ OK ]" << endl; - if( this->matrix.getRows() == 0 || this->matrix.getColumns() == 0 ) - { - cerr << "The matrix for the semi-implicit time stepping was not set correctly." << endl; - if( ! this->matrix.getRows() ) - cerr << "The matrix dimensions are set to 0 rows." << endl; - if( ! this->matrix.getColumns() ) - cerr << "The matrix dimensions are set to 0 columns." << endl; - cerr << "Please check the method 'setupLinearSystem' in your solver." << endl; - return false; - } - if( ! this->rightHandSide.setSize( this->matrix.getRows() ) ) - return false;*/ - return true; - } - - void setProblem( ProblemType& problem ) {this -> problem = &problem;} - ProblemType* getProblem() const {return this -> problem;} - - bool setTimeStep( const RealType& timeStep ) - { - if( timeStep <= 0.0 ) - { - cerr << "Time step for tnlExplicitINSTimeStepper must be positive. " << endl; - return false; - } - this->timeStep = timeStep; - return true; - } - - const RealType& getTimeStep() const; - - bool solve( const RealType& time, - const RealType& stopTime, - const MeshType& mesh, - DofVectorType& dofVector, - DofVectorType& auxiliaryDofVector ) - { - tnlAssert( this->problem != 0, ); - RealType t = time; - while( t < stopTime ) - { - RealType currentTau = Min( this->timeStep, stopTime - t ); - currentTau = 0.005; - - this->problem->doStep(currentTau,mesh); - - t += currentTau; - } - return true; - } - - bool writeEpilog( tnlLogger& logger ) const { return true; } - - protected: - - Problem* problem; - //LinearSolver _matSolver; - RealType timeStep; - -}; - -#include "tnlExplicitINSTimeStepper_impl.h" - -#endif /* EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_H_ */ diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper_impl.h b/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper_impl.h deleted file mode 100644 index 5cde25f226334bd6f8427aaee025c2b672a89780..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnlExplicitINSTimeStepper_impl.h +++ /dev/null @@ -1,23 +0,0 @@ -/*************************************************************************** - tnlExplicitINSTimeStepper_impl.h - description - ------------------- - begin : Feb 17, 2015 - copyright : (C) 2015 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_IMPL_H_ -#define EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_IMPL_H_ - - - -#endif /* EXAMPLES_INCOMPRESSIBLE_NAVIER_STOKES_TNLEXPLICITINSTIMESTEPPER_IMPL_H_ */ diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem.h b/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem.h deleted file mode 100644 index cb4495c2898ff346e713d1563c4fc0be1a439199..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem.h +++ /dev/null @@ -1,489 +0,0 @@ -/*************************************************************************** - tnlIncompressibleNavierStokesProblem.h - description - ------------------- - begin : Feb 23, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_H_ -#define TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_H_ - -#include <mesh/tnlGrid2D.h> -#include <problems/tnlPDEProblem.h> -#include <operators/diffusion/tnlLinearDiffusion.h> -#include <core/arrays/tnlStaticArray.h> -#include <solvers/pde/tnlLinearSystemAssembler.h> -#include <solvers/linear/stationary/tnlJacobiSolver.h> -#include <operators/tnlAnalyticNeumannBoundaryConditions.h> -#include <functors/tnlConstantFunction.h> -#include <solvers/pde/tnlNoTimeDiscretisation.h> -#include <matrices/tnlEllpackMatrix.h> -#include "tnlExplicitINSTimeStepper.h" -#include "solver.h" - -template<class T> T square(const T & val){return val*val;} - -template< typename Mesh, - typename BoundaryCondition, - typename RightHandSide, - typename DifferentialOperator > -class tnlIncompressibleNavierStokesProblem : public tnlPDEProblem< Mesh, - typename DifferentialOperator::RealType, - typename Mesh::DeviceType, - typename DifferentialOperator::IndexType > -{ - -public: - typedef typename DifferentialOperator::RealType RealType; - typedef typename Mesh::DeviceType DeviceType; - typedef typename DifferentialOperator::IndexType IndexType; - typedef tnlIncompressibleNavierStokesProblem< Mesh, BoundaryCondition, RightHandSide, DifferentialOperator > ThisType; - typedef tnlPDEProblem< Mesh, RealType, DeviceType, IndexType > BaseType; - using typename BaseType::MeshType; - //typedef tnlGrid<2, RealType, tnlHostDevice, IndexType> MeshType; - using typename BaseType::DofVectorType; - - typedef tnlEllpackMatrix< RealType, tnlHost, IndexType > MatrixType; - typedef tnlJacobiSolver<MatrixType> LinearSolver; - typedef tnlExplicitINSTimeStepper< ThisType, LinearSolver > TimeStepper; - typedef typename MeshType::CoordinatesType CoordinatesType; - - enum { Dimensions = Mesh::Dimensions }; - -protected: - NSSolver validator; - RealType visc, upVelocity; - MatrixType poissonMat, advectDiffuseMat; - - DofVectorType vel, vel0, vel_aux, vel_rhs, p, p_rhs; - - -public: - - static tnlString getTypeStatic() {return tnlString( "tnlNSProblem< " ) + Mesh :: getTypeStatic() + " >";} - - tnlString getPrologHeader() const{return tnlString( "NS equation" );} - - void writeProlog( tnlLogger& logger, - const tnlParameterContainer& parameters ) const {} - - bool setup( const tnlParameterContainer& parameters ){ - visc = parameters.getParameter< RealType >( "viscosity" ); - /*if( ! this->boundaryCondition.setup( parameters, "boundary-conditions-" ) || - ! this->rightHandSide.setup( parameters, "right-hand-side-" ) ) - return false;*/ - return true; - } - - void preparePoisson(const MeshType& mesh, MatrixType& matrix ) const - { - IndexType nx = mesh.getDimensions().x(), ny = mesh.getDimensions().y(), n = nx*ny; - typename MatrixType::CompressedRowsLengthsVector rowLenghts; - rowLenghts.setSize(n); - for (IndexType y = 0; y < ny; y++) for (IndexType x = 0; x < nx; x++) - rowLenghts[mesh.getCellIndex(CoordinatesType(x,y))] = mesh.isBoundaryCell(CoordinatesType(x,y))? 1 : 5; - matrix.setDimensions(n,n); - matrix.setCompressedRowsLengths(rowLenghts); - for (IndexType y = 0; y < ny; y++) for (IndexType x = 0; x < nx; x++) - { - IndexType row = mesh.getCellIndex(CoordinatesType(x,y)); - - if (x==0) {matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x+1,y)), 1.0); continue;} - else if (y==0) {matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x,y+1)), 1.0); continue;} - else if (x==nx-1 ) {matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x-1,y)), 1.0); continue;} - else if (y==ny-1) {matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x,y-1)), 1.0); continue;} - - matrix.setElement(row, row, 4); - matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x+1,y)), -1); - matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x-1,y)), -1); - matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x,y+1)), -1); - matrix.setElement(row, mesh.getCellIndex(CoordinatesType(x,y-1)), -1); - } - } - - bool setInitialCondition( const tnlParameterContainer& parameters, - const MeshType& mesh, - DofVectorType& dofs, - DofVectorType& auxDofs ) - { - vel.setSize(mesh.getNumberOfFaces()); - vel0.setSize(vel.getSize()); - vel_aux.setSize(vel.getSize()); - vel_rhs.setSize(vel.getSize()); - p.setSize(mesh.getNumberOfCells()); - p_rhs.setSize(mesh.getNumberOfCells()); - validator.init(sqrt(mesh.getNumberOfCells())); - - vel.setValue(0); vel0.setValue(0); - p.setValue(0); p_rhs.setValue(0); - - upVelocity = parameters.getParameter< RealType >( "inletVelocity" ); - upVelocity = 1; - - //Prepare diffusion matrix pattern - typename MatrixType::CompressedRowsLengthsVector rowLenghts; - rowLenghts.setSize(mesh.getNumberOfFaces()); - for (int i = 0; i < rowLenghts.getSize(); i++) - rowLenghts[i] = num_in_row(mesh, i); - advectDiffuseMat.setDimensions(mesh.getNumberOfFaces(), mesh.getNumberOfFaces()); - advectDiffuseMat.setCompressedRowsLengths(rowLenghts); - - preparePoisson(mesh, poissonMat); - - SetBnd(mesh); - return true; - } - - //template< typename MatrixType > - bool setupLinearSystem( const MeshType& mesh, MatrixType& matrix ){/*NO*/} - - bool makeSnapshot( const RealType& time, - const IndexType& step, - const MeshType& mesh, - DofVectorType& dofs, - DofVectorType& auxDofs ) - { - cout << endl << "Writing output at time " << time << " step " << step << "." << endl; - - //this->bindAuxiliaryDofs( mesh, auxiliaryDofs ); - //cout << "dofs = " << dofs << endl; - tnlString fileName; - FileNameBaseNumberEnding( "u-", step, 5, ".vtk", fileName ); - save("test.txt", mesh); - //if( ! this->solution.save( fileName ) ) - // return false; - return true; - } - - IndexType getDofs( const MeshType& mesh ) const {return mesh.getNumberOfFaces();} - void bindDofs( const MeshType& mesh, DofVectorType& dofVector ) {} - - void getExplicitRHS( const RealType& time, const RealType& tau, const MeshType& mesh, DofVectorType& _u, DofVectorType& _fu ) {/*NO*/} - - //template< typename MatrixType > - void assemblyLinearSystem( const RealType& time, - const RealType& tau, - const MeshType& mesh, - DofVectorType& dofs, - DofVectorType& auxDofs, - MatrixType& matrix, - DofVectorType& rightHandSide ) {/*NO*/} - - void set_zero_neumann(tnlSharedVector< RealType, DeviceType, IndexType > & vec) - { - /*int ex = a.width()-1, ey=a.height()-1; - for (int x=1; x < ex; x++) {a(x,0) = a(x,1); a(x,ey) = a(x,ey-1);} - for (int y=1; y < ey; y++) {a(0,y) = a(1,y); a(ex,y) = a(ex-1,y);} - a(0,0)=0.5*(a(0,1)+a(1,0)); - a(ex,0)=0.5*(a(ex-1,0)+a(ex,1)); - a(0,ey)=0.5*(a(1,ey)+a(0,ey-1)); - a(ex,ey)=0.5*(a(ex-1,ey)+a(ex,ey-1));*/ - } - - void SetBnd(const MeshType& mesh) - { - for (int i = 1; i < mesh.getDimensions().x(); i++) - { - IndexType ind = mesh.template getFaceIndex<1,0>(CoordinatesType(i, mesh.getDimensions().y() - 1)); - vel0[ind] = vel[ind] = 0.05; - } - } - - double getCenterU(const MeshType& mesh, IndexType cell) //x,y based on cells - { - return 0.5*(vel0[mesh.template getFaceNextToCell<-1,0>(cell)] + vel0[mesh.template getFaceNextToCell<+1,0>(cell)] ); - } - double getCenterV(const MeshType& mesh, IndexType cell) //x,y based on cells - { - return 0.5*(vel0[mesh.template getFaceNextToCell<0,-1>(cell)] + vel0[mesh.template getFaceNextToCell<0,+1>(cell)] ); - } - double getCrossU(const MeshType& mesh, int x, int y) //x,y based (n+1)*(n+1) - { - const CoordinatesType cellCoords(x,y); - const CoordinatesType downCoords(x,y-1); - return 0.5*(vel0[mesh.template getFaceNextToCell<-1,0>(mesh.getCellIndex(cellCoords))] - +vel0[mesh.template getFaceNextToCell<-1,0>(mesh.getCellIndex(downCoords))]); - } - double getCrossV(const MeshType& mesh, int x, int y) //x,y based (n+1)*(n+1) - { - const CoordinatesType cellCoords(x,y); - const CoordinatesType leftCoords(x-1,y); - return 0.5*(vel0[mesh.template getFaceNextToCell<0,-1>(mesh.getCellIndex(cellCoords))] - +vel0[mesh.template getFaceNextToCell<0,-1>(mesh.getCellIndex(leftCoords))]); - } - - RealType HorAvgXFace(const MeshType& mesh,const DofVectorType & val, IndexType x, IndexType y) const - { - IndexType i1 = mesh.template getFaceIndex<1,0>(CoordinatesType(x,y)) , i2 = mesh.template getFaceIndex<1,0>(CoordinatesType(x+1,y)); - return 0.5*(val[i1] + val[i2]); - } - RealType VerAvgXFace(const MeshType& mesh,const DofVectorType & val, IndexType x, IndexType y) const - { - IndexType i1 = mesh.template getFaceIndex<1,0>(CoordinatesType(x,y)) , i2 = mesh.template getFaceIndex<1,0>(CoordinatesType(x,y+1)); - return 0.5*(val[i1] + val[i2]); - } - RealType HorAvgYFace(const MeshType& mesh,const DofVectorType & val, IndexType x, IndexType y) const - { - IndexType i1 = mesh.template getFaceIndex<0,1>(CoordinatesType(x,y)) , i2 = mesh.template getFaceIndex<0,1>(CoordinatesType(x+1,y)); - return 0.5*(val[i1] + val[i2]); - } - RealType VerAvgYFace(const MeshType& mesh,const DofVectorType & val, IndexType x, IndexType y) const - { - IndexType i1 = mesh.template getFaceIndex<0,1>(CoordinatesType(x,y)) , i2 = mesh.template getFaceIndex<0,1>(CoordinatesType(x,y+1)); - return 0.5*(val[i1] + val[i2]); - } - - int num_in_row(const MeshType& mesh, int row) const { - IndexType fx, fy; - CoordinatesType coord = mesh.getFaceCoordinates(row, fx, fy); - if ((fx && mesh.template isBoundaryFace<1,0>(coord)) || (fy && mesh.template isBoundaryFace<0,1>(coord))) - return 1; - return 5; - } - void get_el_in_row(const MeshType& mesh, const DofVectorType & uv, IndexType row, IndexType ind_in_row, RealType dt, RealType & val, IndexType &col) const - { - IndexType fx, fy; - CoordinatesType coord = mesh.getFaceCoordinates(row, fx, fy); - int x = coord.x(), y = coord.y(); - if ((fx && mesh.template isBoundaryFace<1,0>(coord)) || (fy && mesh.template isBoundaryFace<0,1>(coord))) - {col = row; val = 1; return;} - - IndexType nx = mesh.getDimensions().x(), ny = mesh.getDimensions().y(); - const RealType dx = 1.0/nx, dy=1.0/ny, vix = dt*visc/(dx*dx), viy=dt*visc/(dy*dy); - RealType cxm=0,cym=0,cxp=0,cyp=0; - if (fx) - { - cxm = -0.25*HorAvgXFace(mesh, uv, x-1, y)/dx; cxp = 0.25*HorAvgXFace(mesh, uv, x, y)/dx; - cym = -0.25*HorAvgYFace(mesh, uv, x-1, y)/dy; cyp = 0.25*HorAvgYFace(mesh, uv, x-1, y+1)/dy; - } - else - { - cxm = -0.25*VerAvgXFace(mesh, uv, x, y-1)/dx; cxp = 0.25*VerAvgXFace(mesh, uv, x+1, y-1)/dx; - cym = -0.25*VerAvgYFace(mesh, uv, x, y-1)/dy; cyp = 0.25*VerAvgYFace(mesh, uv, x, y)/dy; - } - - CoordinatesType colCoord; - switch(ind_in_row) - { - case 0: val = 1+dt*(cxm+cxp+cym+cyp)+2*vix+2*viy; colCoord = coord; break; - case 1: val = dt*cxm-vix; colCoord = CoordinatesType(x-1,y); break; - case 2: val = dt*cxp-vix; colCoord = CoordinatesType(x+1,y); break; - case 3: val = dt*cym-viy; colCoord = CoordinatesType(x,y-1); break; - case 4: val = dt*cyp-viy; colCoord = CoordinatesType(x,y+1); break; - case 10: val = 1+2*dt*(cxm+cxp+cym+cyp); colCoord = coord; break; //special number for sum of whole row - } - if (fx) col = mesh.template getFaceIndex<1,0>(colCoord); - else col = mesh.template getFaceIndex<0,1>(colCoord); - } - - void pressureCorrectionWithA(const MeshType& mesh, DofVectorType& x, RealType sign, MatrixType* mat) - { - IndexType fx,fy; - IndexType nx = mesh.template getNumberOfFaces< 1,0 >(), ny = mesh.template getNumberOfFaces< 0,1 >(); - RealType invDx = mesh.getDimensions().x(), invDy = mesh.getDimensions().y(); - for (int i = 0; i < nx; i++) - { - if (mesh.template isBoundaryFace<1,0>(mesh.getFaceCoordinates(i, fx, fy))) continue; - RealType add = sign*0.5*invDx*(p[mesh.template getCellNextToFace<1,0>(i)] - p[mesh.template getCellNextToFace<-1,0>(i)]); - if (mat != NULL) add /= mat->getElement(i,i); - x[i] += add; - } - for (int i = nx; i < nx+ny; i++) - { - if (mesh.template isBoundaryFace<0,1>(mesh.getFaceCoordinates(i, fx, fy))) continue; - RealType add = sign*0.5*invDy*(p[mesh.template getCellNextToFace<0,1>(i)] - p[mesh.template getCellNextToFace<0,-1>(i)]); - if (mat != NULL) add /= mat->getElement(i,i); - x[i] += add; - } - } - - void createRHS(const MeshType& mesh, DofVectorType& b, RealType sign) - { - b = vel0; - pressureCorrectionWithA(mesh, b, sign, NULL); - } - - static bool checkMatrices(const MatrixType& tnlMat, const MatrixCSR& myMat) - { - if (tnlMat.getRows() != myMat.num_rows()) throw "Different number of rows"; - if (tnlMat.getColumns() != myMat.num_cols()) throw "Different number of cols"; - for (int r = 0; r < tnlMat.getRows(); r++) - { - //const typename MatrixType::MatrixRow & row = tnlMat.getRow(r); - //if (row.length != myMat.num_in_row(r)) - // throw "Different number of cells in row"; - - for (int i = 0; i < myMat.num_in_row(r); i++) - { - int col = myMat.get_col_index(r,i), col2 = col; - double val = tnlMat.getElement(r, col), val2 = myMat.get_val_in_row(r, i); - if (col!=col2) - throw "Column indeces are different"; - if (!Equal(val,val2)) - throw "Values are different"; - } - } - } - static bool checkVectors(const DofVectorType& tnlVec, const ArrayD& myVec) - { - if (tnlVec.getSize() != myVec.size()) throw "Different vector size"; - for (int i = 0; i < myVec.size(); i++) - { - double a = tnlVec[i], b = myVec[i]; - if (!Equal(a,b)) - throw "Different"; - } - return true; - } - - static void JacobiIter(const MatrixType& matrix, const DofVectorType& b, DofVectorType& x, DofVectorType & aux, RealType omega) - { - IndexType size = matrix.getRows(); - for( IndexType row = 0; row < size; row ++ ) - matrix.performJacobiIteration( b, row, x, aux, omega ); - for( IndexType row = 0; row < size; row ++ ) - matrix.performJacobiIteration( b, row, aux, x, omega ); - } - - void solveAdvectMat(int iter, double omega) - { - for (int i = 0; i < iter; i++) - JacobiIter(advectDiffuseMat, vel_rhs, vel, vel_aux, omega); - } - - void prepareAdvectDiffMat(const MeshType& mesh, RealType dt) - { - validator.prepareAdvectMat(visc, dt); - checkVectors(vel, validator.vels); - - RealType val; - IndexType col; - for (int row = 0; row < advectDiffuseMat.getRows(); row++) - for (int i = 0; i < num_in_row(mesh, row); i++) - { - get_el_in_row(mesh, vel, row, i, dt, val, col); - advectDiffuseMat.setElement(row, col, val); - } - createRHS(mesh, vel_rhs, -1); - validator.createRHS(validator.vels0,validator.u, validator.v, validator.p,validator.b); - vel_aux = vel; - validator.aux.copy(validator.vels); - - checkMatrices(advectDiffuseMat, validator.advectMat); - checkMatrices(poissonMat, validator.poissMat); - checkVectors(vel_rhs, validator.b); - checkVectors(vel_aux, validator.aux); - - int iter = 1; double omega = 0.7; - validator.solveAdvectMat(iter, omega); - solveAdvectMat(iter, omega); - - checkVectors(vel, validator.vels); - iter++; - } - - void doStep(RealType dt, const MeshType& mesh) - { - prepareAdvectDiffMat(mesh, dt); - } - - void computeVelocityDivergence(IndexType cell, const tnlVector<RealType, DeviceType, IndexType> & v, const MeshType& mesh, tnlVector<RealType, DeviceType, IndexType> & rhs) - { - double diffU = v[mesh.template getFaceNextToCell<1,0>(cell)] - v[mesh.template getFaceNextToCell<-1,0>(cell)]; - double diffV = v[mesh.template getFaceNextToCell<0,1>(cell)] - v[mesh.template getFaceNextToCell<0,-1>(cell)]; - rhs[cell] = -0.5f*(diffU/mesh.getDimensions().x() + diffV/mesh.getDimensions().y()); // -(u_x + v_y) - } - void updateVelocityByPressureCorrection(IndexType cell, const tnlVector<RealType, DeviceType, IndexType> & v, const MeshType& mesh, tnlVector<RealType, DeviceType, IndexType> & p) - { - RealType pVal = p[cell]; - double nx =mesh.getDimensions().x(), ny=mesh.getDimensions().y(); - vel[mesh.template getFaceNextToCell<-1,0>(cell)] -= 0.5*nx*pVal; - vel[mesh.template getFaceNextToCell<+1,0>(cell)] += 0.5*nx*pVal; - vel[mesh.template getFaceNextToCell<0,-1>(cell)] -= 0.5*ny*pVal; - vel[mesh.template getFaceNextToCell<0,+1>(cell)] += 0.5*ny*pVal; - } - - void project(const MeshType& mesh) - { - typedef tnlConstantFunction< Dimensions, RealType > ConstantFunction; - typedef tnlLinearDiffusion< MeshType, RealType, IndexType> LinDiffOper; - typedef tnlAnalyticNeumannBoundaryConditions< MeshType, ConstantFunction, RealType, IndexType > BoundaryConditions; - - tnlLinearSystemAssembler< MeshType, - tnlVector<RealType, DeviceType, IndexType>, - LinDiffOper, - BoundaryConditions, - ConstantFunction, - tnlNoTimeDiscretisation, - MatrixType > systemAssembler; - LinDiffOper linDiffOper; - BoundaryConditions boundaryConditions; - ConstantFunction zeroFunc; - - systemAssembler.template assembly< Mesh::Dimensions >( (RealType)0, - (RealType)0, - mesh, - linDiffOper, - boundaryConditions, - zeroFunc, //rhs func - p, - poissonMat, - p_rhs ); - - //_matSolver.setMatrix(poissonMat); - //_matSolver.solve(p_rhs,p); - int nx = mesh.getDimensions().x(), ny=mesh.getDimensions().y(); - for ( int i=0 ; i< nx; i++ ) for (int j=0 ; j< ny; j++ ) - computeVelocityDivergence(mesh.getCellIndex(CoordinatesType( i, j )), vel, mesh, p_rhs); - - for (int x=1; x< nx-1; x++) for(int y=1; y < ny-1; y++) - updateVelocityByPressureCorrection(mesh.getCellIndex(CoordinatesType(x,y)),vel, mesh, p); - } - - void save(const char * filename, const MeshType& mesh) - { - //FILE * pFile = fopen (filename, "w"); - //fprintf(pFile, "#X Y u v\n"); - int nx = mesh.getDimensions().x(), ny=mesh.getDimensions().y(), n=nx*ny; - int dims[] = {nx,ny,1}; - double *vars = new double[n*3]; - double *vvars[] = {vars}; - - int varDim[] = {3}; - int centering[] = {0}; - const char * names[] = {"Rychlost"}; - - for (IndexType j=0 ; j< ny ; j++ ) for ( IndexType i=0 ; i< nx ; i++ ) - { - IndexType cell = mesh.getCellIndex(typename MeshType::CoordinatesType(i,j)); - int ii = 3*(j*nx+i); - vars[ii+0] = getCenterU(mesh, cell); - vars[ii+1] = getCenterV(mesh, cell); - vars[ii+2] = 0; - //fprintf(pFile, "%lg %lg %lg %lg\n", (RealType)i, (RealType)j, getCenterU(mesh, cell), getCenterV(mesh, cell)); - } - //fclose (pFile); - void write_regular_mesh(const char *filename, int useBinary, int *dims, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars); - write_regular_mesh(filename, 0, dims, 1, varDim, centering, names, vvars ); - delete[] vars; - } -}; - -#include "tnlIncompressibleNavierStokesProblem_impl.h" - -#endif /* TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_H_ */ - - -//Refaktor, do objektu, setup na parametry, laplace podle tnlLinearDiffusion diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem_impl.h b/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem_impl.h deleted file mode 100644 index 995c404b67e7db8c7095bf3db3b7e4ffe2ed57b1..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnlIncompressibleNavierStokesProblem_impl.h +++ /dev/null @@ -1,40 +0,0 @@ -/*************************************************************************** - tnlIncompressibleNavierStokesProblem_impl.h - description - ------------------- - begin : Mar 10, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_IMPL_H_ -#define TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_IMPL_H_ - -#include <core/mfilename.h> -#include <matrices/tnlMatrixSetter.h> -#include <matrices/tnlMultidiagonalMatrixSetter.h> -#include <core/tnlLogger.h> -#include <solvers/pde/tnlExplicitUpdater.h> -#include <solvers/pde/tnlLinearSystemAssembler.h> -#include <solvers/pde/tnlBackwardTimeDiscretisation.h> - - - - - - - - - - - - -#endif /* TNLINCOMPRESSIBLENAVIERSTOKESPROBLEM_IMPL_H_ */ diff --git a/src/TNL/legacy/incompressible-navier-stokes/tnlNSFastBuildConfig.h b/src/TNL/legacy/incompressible-navier-stokes/tnlNSFastBuildConfig.h deleted file mode 100644 index d683048bfd40f6522eb26b6fa3ab84218249ff61..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/tnlNSFastBuildConfig.h +++ /dev/null @@ -1,69 +0,0 @@ -/*************************************************************************** - tnlNSFastBuildConfig.h - description - ------------------- - begin : Jul 7, 2014 - copyright : (C) 2014 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLNSFASTBUILDCONFIG_H_ -#define TNLNSFASTBUILDCONFIG_H_ - -class tnlNSFastBuildConfig -{ - public: - - static void print() { cerr << "tnlNSFastBuildConfig" << endl; } -}; - -/**** - * Turn off support for float and long double. - */ -template<> struct tnlConfigTagReal< tnlNSFastBuildConfig, float > { enum { enabled = false }; }; -template<> struct tnlConfigTagReal< tnlNSFastBuildConfig, long double > { enum { enabled = false }; }; - -/**** - * Turn off support for short int and long int indexing. - */ -template<> struct tnlConfigTagIndex< tnlNSFastBuildConfig, short int >{ enum { enabled = false }; }; -template<> struct tnlConfigTagIndex< tnlNSFastBuildConfig, long int >{ enum { enabled = false }; }; - -/**** - * 1, 2, and 3 dimensions are enabled by default - */ -template<> struct tnlConfigTagDimensions< tnlNSFastBuildConfig, 1 >{ enum { enabled = false }; }; -template<> struct tnlConfigTagDimensions< tnlNSFastBuildConfig, 2 >{ enum { enabled = true }; }; -template<> struct tnlConfigTagDimensions< tnlNSFastBuildConfig, 3 >{ enum { enabled = false }; }; - -/**** - * Use of tnlGrid is enabled for allowed dimensions and Real, Device and Index types. - */ -template< int Dimensions, typename Real, typename Device, typename Index > - struct tnlConfigTagMesh< tnlNSFastBuildConfig, tnlGrid< Dimensions, Real, Device, Index > > - { enum { enabled = tnlConfigTagDimensions< tnlNSFastBuildConfig, Dimensions >::enabled && - tnlConfigTagReal< tnlNSFastBuildConfig, Real >::enabled && - tnlConfigTagDevice< tnlNSFastBuildConfig, Device >::enabled && - tnlConfigTagIndex< tnlNSFastBuildConfig, Index >::enabled }; }; - -/**** - * Please, chose your preferred time discretisation here. - */ -template<> struct tnlConfigTagTimeDiscretisation< tnlNSFastBuildConfig, tnlExplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< tnlNSFastBuildConfig, tnlSemiImplicitTimeDiscretisationTag >{ enum { enabled = true }; }; -template<> struct tnlConfigTagTimeDiscretisation< tnlNSFastBuildConfig, tnlImplicitTimeDiscretisationTag >{ enum { enabled = false }; }; - -/**** - * Only the Runge-Kutta-Merson solver is enabled by default. - */ -template<> struct tnlConfigTagExplicitSolver< tnlNSFastBuildConfig, tnlExplicitEulerSolverTag >{ enum { enabled = false }; }; - -#endif /* TNLNSFASTBUILDCONFIG_H_ */ diff --git a/src/TNL/legacy/incompressible-navier-stokes/visit_writer.cpp b/src/TNL/legacy/incompressible-navier-stokes/visit_writer.cpp deleted file mode 100644 index c27478eacd5849a2cd2c8425007c5c27cff3977a..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/visit_writer.cpp +++ /dev/null @@ -1,1066 +0,0 @@ -/* ************************************************************************* // -// visit_writer.c // -// ************************************************************************* */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "visit_writer.h" //mozna uvozovky - - -/* - * Globals. - */ - -static FILE *fp = NULL; -static int useBinary = 0; -static int numInColumn = 0; - - -/* **************************************************************************** - * Function: end_line - * - * Purpose: - * If floats or ints have been written using the write_float or write_int - * functions, this will issue a newline (if necessary) so that a new - * heading can be placed. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void end_line(void) -{ - if (!useBinary) - { - char str2[8] = "\n"; - fprintf(fp, str2); - numInColumn = 0; - } -} - - -/* **************************************************************************** - * Function: open_file - * - * Purpose: - * Opens a file for writing and assigns the handle to the global variable - * "fp". - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void open_file(const char *filename) -{ - char full_filename[1024]; - if (strstr(filename, ".vtk") != NULL) - { - strcpy(full_filename, filename); - } - else - { - sprintf(full_filename, "%s.vtk", filename); - } - - fp = fopen(full_filename, "w+"); -} - - -/* **************************************************************************** - * Function: close_file - * - * Purpose: - * Closes the file with handle "fp" (a global variable). - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void close_file(void) -{ - end_line(); - fclose(fp); - fp = NULL; -} - - -/* **************************************************************************** - * Function: force_big_endian - * - * Purpose: - * Determines if the machine is little-endian. If so, then, for binary - * data, it will force the data to be big-endian. - * - * Note: This assumes that all inputs are 4 bytes long. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void force_big_endian(unsigned char *bytes) -{ - static int doneTest = 0; - static int shouldSwap = 0; - if (!doneTest) - { - int tmp1 = 1; - unsigned char *tmp2 = (unsigned char *) &tmp1; - if (*tmp2 != 0) - shouldSwap = 1; - doneTest = 1; - } - - if (shouldSwap & useBinary) - { - unsigned char tmp = bytes[0]; - bytes[0] = bytes[3]; - bytes[3] = tmp; - tmp = bytes[1]; - bytes[1] = bytes[2]; - bytes[2] = tmp; - } -} - -/* **************************************************************************** - * Function: force_double_big_endian - * - * Purpose: - * Determines if the machine is little-endian. If so, then, for binary - * data, it will force the data to be big-endian. - * - * Note: This assumes that all inputs are 8 bytes long. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void force_double_big_endian(unsigned char *bytes) -{ - static int doneTest = 0; - static int shouldSwap = 0; - if (!doneTest) - { - int tmp1 = 1; - unsigned char *tmp2 = (unsigned char *) &tmp1; - if (*tmp2 != 0) - shouldSwap = 1; - doneTest = 1; - } - - if (shouldSwap & useBinary) - { - unsigned char tmp = bytes[0]; - bytes[0] = bytes[7]; - bytes[7] = tmp; - tmp = bytes[1]; - bytes[1] = bytes[6]; - bytes[6] = tmp; - tmp = bytes[2]; - bytes[2] = bytes[5]; - bytes[5] = tmp; - tmp = bytes[3]; - bytes[3] = bytes[4]; - bytes[4] = tmp; - } -} - - -/* **************************************************************************** - * Function: write_string - * - * Purpose: - * Writes a character to the open file. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void write_string(const char *str) -{ - fprintf(fp, str); -} - - -/* **************************************************************************** - * Function: new_section - * - * Purpose: - * Adds a new line, provided we didn't already just do so and we are - * writing an ASCII file. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void new_section(void) -{ - if (numInColumn != 0) - end_line(); - numInColumn = 0; -} - - -/* **************************************************************************** - * Function: write_int - * - * Purpose: - * Writes an integer to the currently open file. This routine takes - * care of ASCII vs binary issues. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void write_int(int val) -{ - if (useBinary) - { - force_big_endian((unsigned char *) &val); - fwrite(&val, sizeof(int), 1, fp); - } - else - { - char str[128]; - sprintf(str, "%d ", val); - fprintf(fp, str); - if (((numInColumn++) % 9) == 8) - { - char str2[8] = "\n"; - fprintf(fp, str2); - numInColumn = 0; - } - } -} - - -/* **************************************************************************** - * Function: write_float - * - * Purpose: - * Writes an float to the currently open file. This routine takes - * care of ASCII vs binary issues. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * Modifications: - * - * Hank Childs, Fri Apr 22 09:14:44 PDT 2005 - * Make precision changes suggested by Jeff McAninch - * - * ************************************************************************* */ - -static void write_float(float val) -{ - if (useBinary) - { - force_big_endian((unsigned char *) &val); - fwrite(&val, sizeof(float), 1, fp); - } - else - { - char str[128]; - sprintf(str, "%20.12e ", val); - fprintf(fp, str); - if (((numInColumn++) % 9) == 8) - { - end_line(); - } - } -} -/* **************************************************************************** - * Function: write_double - * - * Purpose: - * Writes a double to the currently open file. This routine takes - * care of ASCII vs binary issues. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * Modifications: - * - * Hank Childs, Fri Apr 22 09:14:44 PDT 2005 - * Make precision changes suggested by Jeff McAninch - * - * ************************************************************************* */ - -static void write_double(double val) -{ - if (useBinary) - { - force_double_big_endian((unsigned char *) &val); - fwrite(&val, sizeof(double), 1, fp); - } - else - { - char str[128]; - sprintf(str, "%20.12e ", val); - fprintf(fp, str); - if (((numInColumn++) % 9) == 8) - { - end_line(); - } - } -} - - -/* **************************************************************************** - * Function: write_header - * - * Purpose: - * Writes the standard VTK header to the file. This should be the first - * thing written to the file. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static void write_header(void) -{ - fprintf(fp, "# vtk DataFile Version 2.0\n"); - fprintf(fp, "Written using VisIt writer\n"); - if (useBinary) - fprintf(fp, "BINARY\n"); - else - fprintf(fp, "ASCII\n"); -} - - -/* **************************************************************************** - * Function: write_variables - * - * Purpose: - * Writes the variables to the file. This can be a bit tricky. The - * cell data must be written first, followed by the point data. When - * writing the [point|cell] data, one variable must be declared the - * primary scalar and another the primary vector (provided scalars - * or vectors exist). The rest of the arrays are added through the - * "field data" mechanism. Field data should support groups of arrays - * with different numbers of components (ie a scalar and a vector), but - * there is a failure with the VTK reader. So the scalars are all written - * one group of field data and then the vectors as another. If you don't - * write it this way, the vectors do not show up. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -void write_variables(int nvars, int *vardim, int *centering, - const char * const * varname, double **vars, - int npts, int ncells) -{ - char str[1024]; - int i, j, first_scalar, first_vector; - int num_scalars, num_vectors; - int num_field = 0; - - new_section(); - sprintf(str, "CELL_DATA %d\n", ncells); - write_string(str); - - first_scalar = 0; - first_vector = 0; - num_scalars = 0; - num_vectors = 0; - /* The field data is where the non-primary scalars and vectors are - * stored. They must all be grouped together at the end of the point - * data. So write out the primary scalars and vectors first. - */ - for (i = 0 ; i < nvars ; i++) - { - if (centering[i] == 0) - { - int num_to_write = 0; - int should_write = 0; - - if (vardim[i] == 1) - { - if (first_scalar == 0) - { - should_write = 1; - sprintf(str, "SCALARS %s double\n", varname[i]); - write_string(str); - write_string("LOOKUP_TABLE default\n"); - first_scalar = 1; - } - else - num_scalars++; - } - else if (vardim[i] == 3) - { - if (first_vector == 0) - { - should_write = 1; - sprintf(str, "VECTORS %s double\n", varname[i]); - write_string(str); - first_vector = 1; - } - else - num_vectors++; - } - else - { - printf("Only supported variable dimensions are 1 and 3.\n"); - printf("Ignoring variable %s.\n", varname[i]); - continue; - } - - if (should_write) - { - num_to_write = ncells*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } - - first_scalar = 0; - if (num_scalars > 0) - { - sprintf(str, "FIELD FieldData %d\n", num_scalars); - write_string(str); - for (i = 0 ; i < nvars ; i++) - { - int should_write = 0; - if (centering[i] == 0) - { - if (vardim[i] == 1) - { - if (first_scalar == 0) - { - first_scalar = 1; - } - else - { - should_write = 1; - sprintf(str, "%s 1 %d double\n", varname[i], ncells); - write_string(str); - } - } - } - - if (should_write) - { - int num_to_write = ncells*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } - - first_vector = 0; - if (num_vectors > 0) - { - sprintf(str, "FIELD FieldData %d\n", num_vectors); - write_string(str); - for (i = 0 ; i < nvars ; i++) - { - int should_write = 0; - if (centering[i] == 0) - { - int num_to_write = 0; - - if (vardim[i] == 3) - { - if (first_vector == 0) - { - first_vector = 1; - } - else - { - should_write = 1; - sprintf(str, "%s 3 %d double\n", varname[i], ncells); - write_string(str); - } - } - } - - if (should_write) - { - int num_to_write = ncells*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } - - new_section(); - sprintf(str, "POINT_DATA %d\n", npts); - write_string(str); - - first_scalar = 0; - first_vector = 0; - num_scalars = 0; - num_vectors = 0; - /* The field data is where the non-primary scalars and vectors are - * stored. They must all be grouped together at the end of the point - * data. So write out the primary scalars and vectors first. - */ - for (i = 0 ; i < nvars ; i++) - { - if (centering[i] != 0) - { - int num_to_write = 0; - int should_write = 0; - - if (vardim[i] == 1) - { - if (first_scalar == 0) - { - should_write = 1; - sprintf(str, "SCALARS %s double\n", varname[i]); - write_string(str); - write_string("LOOKUP_TABLE default\n"); - first_scalar = 1; - } - else - num_scalars++; - } - else if (vardim[i] == 3) - { - if (first_vector == 0) - { - should_write = 1; - sprintf(str, "VECTORS %s double\n", varname[i]); - write_string(str); - first_vector = 1; - } - else - num_vectors++; - } - else - { - printf("Only supported variable dimensions are 1 and 3.\n"); - printf("Ignoring variable %s.\n", varname[i]); - continue; - } - - if (should_write) - { - num_to_write = npts*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } - - first_scalar = 0; - if (num_scalars > 0) - { - sprintf(str, "FIELD FieldData %d\n", num_scalars); - write_string(str); - for (i = 0 ; i < nvars ; i++) - { - int should_write = 0; - if (centering[i] != 0) - { - if (vardim[i] == 1) - { - if (first_scalar == 0) - { - first_scalar = 1; - } - else - { - should_write = 1; - sprintf(str, "%s 1 %d double\n", varname[i], npts); - write_string(str); - } - } - } - - if (should_write) - { - int num_to_write = npts*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } - - first_vector = 0; - if (num_vectors > 0) - { - sprintf(str, "FIELD FieldData %d\n", num_vectors); - write_string(str); - for (i = 0 ; i < nvars ; i++) - { - int should_write = 0; - if (centering[i] != 0) - { - int num_to_write = 0; - - if (vardim[i] == 3) - { - if (first_vector == 0) - { - first_vector = 1; - } - else - { - should_write = 1; - sprintf(str, "%s 3 %d double\n", varname[i], npts); - write_string(str); - } - } - } - - if (should_write) - { - int num_to_write = npts*vardim[i]; - for (j = 0 ; j < num_to_write ; j++) - { - write_double(vars[i][j]); - } - end_line(); - } - } - } -} - - -/* **************************************************************************** -// Function: write_point_mesh -// -// Purpose: -// Writes out a point mesh. -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// npts The number of points in the mesh. -// pts The spatial locations of the points. This array should -// be size 3*npts. The points should be encoded as: -// <x1, y1, z1, x2, y2, z2, ..., xn, yn, zn> -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_point_mesh(const char *filename, int ub, int npts, float *pts, - int nvars, int *vardim, const char * const *varnames, - double **vars) -{ - int i; - char str[128]; - int *centering = NULL; - - useBinary = ub; - open_file(filename); - write_header(); - - write_string("DATASET UNSTRUCTURED_GRID\n"); - sprintf(str, "POINTS %d float\n", npts); - write_string(str); - for (i = 0 ; i < 3*npts ; i++) - { - write_float(pts[i]); - } - - new_section(); - sprintf(str, "CELLS %d %d\n", npts, 2*npts); - write_string(str); - for (i = 0 ; i < npts ; i++) - { - write_int(1); - write_int(i); - end_line(); - } - - new_section(); - sprintf(str, "CELL_TYPES %d\n", npts); - write_string(str); - for (i = 0 ; i < npts ; i++) - { - write_int(VISIT_VERTEX); - end_line(); - } - - centering = (int *) malloc(nvars*sizeof(int)); - for (i = 0 ; i < nvars ; i++) - centering[i] = 1; - write_variables(nvars, vardim, centering, varnames, vars, npts, npts); - free(centering); - - close_file(); -} - - -/* **************************************************************************** - * Function: num_points_for_cell - * - * Purpose: - * Determines the number of points for the type of cell. - * - * Programmer: Hank Childs - * Creation: September 3, 2004 - * - * ************************************************************************* */ - -static int num_points_for_cell(int celltype) -{ - int npts = 0; - switch (celltype) - { - case VISIT_VERTEX: - npts = 1; - break; - case VISIT_LINE: - npts = 2; - break; - case VISIT_TRIANGLE: - npts = 3; - break; - case VISIT_QUAD: - npts = 4; - break; - case VISIT_TETRA: - npts = 4; - break; - case VISIT_HEXAHEDRON: - npts = 8; - break; - case VISIT_WEDGE: - npts = 6; - break; - case VISIT_PYRAMID: - npts = 5; - break; - } - return npts; -} - - -/* **************************************************************************** -// Function: write_unstructured_mesh -// -// Purpose: -// Writes out a unstructured mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// npts The number of points in the mesh. -// pts The spatial locations of the points. This array should -// be size 3*npts. The points should be encoded as: -// <x1, y1, z1, x2, y2, z2, ..., xn, yn, zn> -// ncells The number of cells. -// celltypes The type of each cell. -// conn The connectivity array. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_unstructured_mesh(const char *filename, int ub, int npts, - float *pts, int ncells, int *celltypes, int *conn, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars) -{ - int i, j; - char str[128]; - int conn_size = 0; - int *curr_conn = conn; - - useBinary = ub; - open_file(filename); - write_header(); - - write_string("DATASET UNSTRUCTURED_GRID\n"); - sprintf(str, "POINTS %d float\n", npts); - write_string(str); - for (i = 0 ; i < 3*npts ; i++) - { - write_float(pts[i]); - } - - new_section(); - for (i = 0 ; i < ncells ; i++) - { - int npts = num_points_for_cell(celltypes[i]); - - conn_size += npts+1; - } - sprintf(str, "CELLS %d %d\n", ncells, conn_size); - write_string(str); - for (i = 0 ; i < ncells ; i++) - { - int npts = num_points_for_cell(celltypes[i]); - write_int(npts); - for (j = 0 ; j < npts ; j++) - write_int(*curr_conn++); - end_line(); - } - - new_section(); - sprintf(str, "CELL_TYPES %d\n", ncells); - write_string(str); - for (i = 0 ; i < ncells ; i++) - { - write_int(celltypes[i]); - end_line(); - } - - write_variables(nvars, vardim, centering, varnames, vars, npts, ncells); - - close_file(); -} - - -/* **************************************************************************** -// Function: write_rectilinear_mesh -// -// Purpose: -// Writes out a rectilinear mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nX, nY, nZ }, where nX is the -// number of points in the X-dimension, etc. -// x An array of size dims[0] that contains the x-coordinates. -// y An array of size dims[1] that contains the x-coordinates. -// z An array of size dims[2] that contains the x-coordinates. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// Modifications: -// -// Hank Childs, Wed Apr 6 16:22:57 PDT 2005 -// Fix problem with 2D structured meshes and assessing cell count. -// -// ***************************************************************************/ - -void write_rectilinear_mesh(const char *filename, int ub, int *dims, - float *x, float *y, float *z, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars) -{ - int i, j; - char str[128]; - int npts = dims[0]*dims[1]*dims[2]; - int ncX = (dims[0] - 1 < 1 ? 1 : dims[0] - 1); - int ncY = (dims[1] - 1 < 1 ? 1 : dims[1] - 1); - int ncZ = (dims[2] - 1 < 1 ? 1 : dims[2] - 1); - int ncells = ncX*ncY*ncZ; - - useBinary = ub; - open_file(filename); - write_header(); - - write_string("DATASET RECTILINEAR_GRID\n"); - sprintf(str, "DIMENSIONS %d %d %d\n", dims[0], dims[1], dims[2]); - write_string(str); - sprintf(str, "X_COORDINATES %d float\n", dims[0]); - write_string(str); - for (i = 0 ; i < dims[0] ; i++) - write_float(x[i]); - new_section(); - sprintf(str, "Y_COORDINATES %d float\n", dims[1]); - write_string(str); - for (i = 0 ; i < dims[1] ; i++) - write_float(y[i]); - new_section(); - sprintf(str, "Z_COORDINATES %d float\n", dims[2]); - write_string(str); - for (i = 0 ; i < dims[2] ; i++) - write_float(z[i]); - - write_variables(nvars, vardim, centering, varnames, vars, npts, ncells); - - close_file(); -} - - -/* **************************************************************************** -// Function: write_regular_mesh -// -// Purpose: -// Writes out a regular mesh. A regular mesh is one where the data lies -// along regular intervals. "Brick of bytes/doubles", -// "Block of bytes/double", and MRI data all are examples of data that -// lie on regular meshes. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nX, nY, nZ }, where nX is the -// number of points in the X-dimension, etc. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_regular_mesh(const char *filename, int ub, int *dims, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars) -{ - int i; - - float *x = (float *) malloc(sizeof(float)*dims[0]); - float *y = (float *) malloc(sizeof(float)*dims[1]); - float *z = (float *) malloc(sizeof(float)*dims[2]); - - for (i = 0 ; i < dims[0] ; i++) - x[i] = (float) i; - for (i = 0 ; i < dims[1] ; i++) - y[i] = (float) i; - for (i = 0 ; i < dims[2] ; i++) - z[i] = (float) i; - - write_rectilinear_mesh(filename, ub, dims, x, y, z, nvars, vardim, - centering, varnames, vars); - - free(x); - free(y); - free(z); -} - - -/* **************************************************************************** -// Function: write_curvilinear_mesh -// -// Purpose: -// Writes out a curvilinear mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nI, nJ, nK }, where nI is the -// number of points in the logical I dimension, etc. -// pts An array of size nI*nJ*nK*3. The array should be layed -// out as (pt(i=0,j=0,k=0), pt(i=1,j=0,k=0), ... -// pt(i=nI-1,j=0,k=0), pt(i=0,j=1,k=0), ...). -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// Modifications: -// -// Hank Childs, Wed Apr 6 16:22:57 PDT 2005 -// Fix problem with 2D structured meshes and assessing cell count. -// -// ***************************************************************************/ - -void write_curvilinear_mesh(const char *filename, int ub, int *dims,float *pts, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars) -{ - int i, j; - char str[128]; - int npts = dims[0]*dims[1]*dims[2]; - int ncX = (dims[0] - 1 < 1 ? 1 : dims[0] - 1); - int ncY = (dims[1] - 1 < 1 ? 1 : dims[1] - 1); - int ncZ = (dims[2] - 1 < 1 ? 1 : dims[2] - 1); - int ncells = ncX*ncY*ncZ; - - useBinary = ub; - open_file(filename); - write_header(); - - write_string("DATASET STRUCTURED_GRID\n"); - sprintf(str, "DIMENSIONS %d %d %d\n", dims[0], dims[1], dims[2]); - write_string(str); - sprintf(str, "POINTS %d float\n", npts); - write_string(str); - for (i = 0 ; i < 3*npts ; i++) - { - write_float(pts[i]); - } - - write_variables(nvars, vardim, centering, varnames, vars, npts, ncells); - - close_file(); -} - - diff --git a/src/TNL/legacy/incompressible-navier-stokes/visit_writer.h b/src/TNL/legacy/incompressible-navier-stokes/visit_writer.h deleted file mode 100644 index ae67e3ccc8de624ccf7f027cddf223cc32cd476c..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/incompressible-navier-stokes/visit_writer.h +++ /dev/null @@ -1,271 +0,0 @@ -/* ************************************************************************* // -// visit_writer.h // -// ************************************************************************* */ - -/* -// This file contains function prototypes for writing out point meshes, -// unstructured meshes, rectilinear meshes, regular meshes, and -// structured/curvilinear meshes into files that can later be read by VisIt. -// -// Each routine assumes that the data being written is three-dimensional. -// If the data is two-dimensional, you must still write out the data -// as three-dimensional (ie pad arrays so that they are the correct size, etc). -// However: the VisIt reader will determine that the data is truly two- -// dimensional and visualize it as a two-dimensional dataset. -// -// All writers have an ASCII vs Binary decision. The tradeoffs are the -// standard ones: ASCII is human readable, but slow. The -// binary is much faster, but not human readable. Note: the binary format -// is portable, since it converts all data to be big-endian (this was a -// design decision for the format the visit_writer writes to -- the VTK -// format). -// -// If you have multiple grids, you can write out one file for each grid. -// There are potential pitfalls in doing this, where extra geometry and -// interpolation problems appear along grid boundaries. For additional -// help with this issue, e-mail visit-help@llnl.gov -*/ - - -/* **************************************************************************** -// Function: write_point_mesh -// -// Purpose: -// Writes out a point mesh. -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// npts The number of points in the mesh. -// pts The spatial locations of the points. This array should -// be size 3*npts. The points should be encoded as: -// <x1, y1, z1, x2, y2, z2, ..., xn, yn, zn> -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_point_mesh(const char *filename, int useBinary, int npts, - float *pts, int nvars, int *vardim, - const char * const *varnames, double **vars); - - - -/* **************************************************************************** -// Function: write_unstructured_mesh -// -// Purpose: -// Writes out a unstructured mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// npts The number of points in the mesh. -// pts The spatial locations of the points. This array should -// be size 3*npts. The points should be encoded as: -// <x1, y1, z1, x2, y2, z2, ..., xn, yn, zn> -// ncells The number of cells. -// celltypes The type of each cell. -// conn The connectivity array. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// Example: -// You have two triangles. The first has points (0,0,0), (0,1,0), and -// (1,1,0). The second has points (0,0,0), (1,1,0), and (1,0,0). -// -// There are four unique points. -// -// float pts[12] = { 0,0,0, 0,1,0, 1,1,0, 1,0,0 }; -// -// It is important the points list contain only unique points, -// because VisIt is not able to correctly determine the connectivity of a -// dataset when points are duplicated. -// -// There are two triangles. -// int ncells = 2; -// -// The cells are both triangles. -// int celltypes[2] = { VISIT_TRIANGLE, VISIT_TRIANGLE }; -// -// The connectivity contains indices into the points list. The indexing -// assumes that each point has size 3 (x,y,z). -// -// int conn[6] = { 0, 1, 2, 0, 2, 3 }; -// -// Hint: -// When writing an unstructured mesh, it is easy to get the orientation -// of a cell backwards. VisIt typically does okay with this, but it -// can cause problems. To test if this is happening, bring up VisIt on -// your newly outputted dataset and make a Pseudocolor plot of -// "mesh_quality/volume" for 3D datasets or "mesh_quality/area" for 2D -// datasets. If the cells are inside-out, the volumes or areas will be -// negative. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -#define VISIT_VERTEX 1 -#define VISIT_LINE 3 -#define VISIT_TRIANGLE 5 -#define VISIT_QUAD 9 -#define VISIT_TETRA 10 -#define VISIT_HEXAHEDRON 12 -#define VISIT_WEDGE 13 -#define VISIT_PYRAMID 14 - -void write_unstructured_mesh(const char *filename, int useBinary, int npts, - float *pts, int ncells, int *celltypes, int *conn, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars); - - - -/* **************************************************************************** -// Function: write_regular_mesh -// -// Purpose: -// Writes out a regular mesh. A regular mesh is one where the data lies -// along regular intervals. "Brick of bytes/floats", -// "Block of bytes/floats", and MRI data all are examples of data that -// lie on regular meshes. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nX, nY, nZ }, where nX is the -// number of points in the X-dimension, etc. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_regular_mesh(const char *filename, int useBinary, int *dims, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars); - - - - -/* **************************************************************************** -// Function: write_rectilinear_mesh -// -// Purpose: -// Writes out a rectilinear mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nX, nY, nZ }, where nX is the -// number of points in the X-dimension, etc. -// x An array of size dims[0] that contains the x-coordinates. -// y An array of size dims[1] that contains the x-coordinates. -// z An array of size dims[2] that contains the x-coordinates. -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Example: -// You have a rectilinear mesh with x = { 0, 1, 2}, y = { 1, 1.5, 2, 3 }, -// and z = { 2.5, 3.5 }. -// -// Then dims = { 3, 4, 2 }. -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_rectilinear_mesh(const char *filename, int useBinary, - int *dims, float *x, float *y, float *z, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars); - - - - -/* **************************************************************************** -// Function: write_curvilinear_mesh -// -// Purpose: -// Writes out a curvilinear mesh. -// -// -// Arguments: -// filename The name of the file to write. If the extension ".vtk" is -// not present, it will be added. -// useBinary '0' to write ASCII, !0 to write binary -// dims An array of size 3 = { nI, nJ, nK }, where nI is the -// number of points in the logical I dimension, etc. -// pts An array of size nI*nJ*nK*3. The array should be layed -// out as (pt(i=0,j=0,k=0), pt(i=1,j=0,k=0), ... -// pt(i=nI-1,j=0,k=0), pt(i=0,j=1,k=0), ...). -// nvars The number of variables. -// vardim The dimension of each variable. The size of vardim should -// be nvars. If var i is a scalar, then vardim[i] = 1. -// If var i is a vector, then vardim[i] = 3. -// centering The centering of each variable. The size of centering -// should be nvars. If centering[i] == 0, then the variable -// is cell-based. If centering[i] != 0, then the variable -// is point-based. -// vars An array of variables. The size of vars should be nvars. -// The size of vars[i] should be npts*vardim[i]. -// -// -// Programmer: Hank Childs -// Creation: September 2, 2004 -// -// ***************************************************************************/ - -void write_curvilinear_mesh(const char *filename, int useBinary, - int *dims, float *pts, - int nvars, int *vardim, int *centering, - const char * const *varnames, double **vars); - - - diff --git a/src/TNL/legacy/mesh/tnlDistributedGrid.h b/src/TNL/legacy/mesh/tnlDistributedGrid.h deleted file mode 100644 index 6cdc286e9535c54e19a47e92cae58e286d6feaca..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/mesh/tnlDistributedGrid.h +++ /dev/null @@ -1,203 +0,0 @@ -/*************************************************************************** - tnlDistributedGrid.h - description - ------------------- - begin : Feb 26, 2011 - copyright : (C) 2011 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLDISTRIBUTEDGRID_H_ -#define TNLDISTRIBUTEDGRID_H_ - -#include <TNL/Object.h> -#include <TNL/tnlCommunicator.h> - -template< int Dimension, - typename GridType, - typename Device = Devices::Host, - typename Real = double, - typename Index = int > -class tnlDistributedGrid : public Object -{ - //! We do not allow constructor without parameters. - tnlDistributedGrid(); - - //! We do not allow copy constructor without object name. - tnlDistributedGrid( const tnlDistributedGrid< Dimension, Real, Device, Index >& a ); - - public: - - tnlDistributedGrid( const String& name ); - - bool init( tnlCommunicator* communicator, - const GridType& grid, - const StaticVector< Dimension, Index >& subdomainOverlaps ); - - tnlCommunicator< Device >* getCommunicator() const; - - const StaticVector< Dimension, Real >& getDomainLowerCorner() const; - - const StaticVector< Dimension, Real >& getDomainUpperCorner() const; - - const StaticVector< Dimension, Index >& getDimensions() const; - - const StaticVector< Dimension, int >& getGridDimensions() const; - - const StaticVector< Dimension, int >& getLowerNeighbors() const; - - const StaticVector< Dimension, Index >& getLowerSubdomainsOverlaps() const; - - const StaticVector< Dimension, int >& getNodeCoordinates() const; - - const StaticVector< Dimension, Index >& getSubdomainDimensions() const; - - const StaticVector< Dimension, Index >& getUpperSubdomainsOverlaps() const; - - const StaticVector< Dimension, int >& getUppperNeighbors() const; - - protected: - - //! Pointer to the communicator used by this distributed grid. - tnlCommunicator< Device >* communicator; - - //! In 2D this is the left bottom corner of the global domain. - /*!*** - * This is naturally generalized to more dimensions. - */ - StaticVector< Dimension, Real > domainLowerCorner; - - //! In 2D this is the right top corner of the global domain. - /*!*** - * This is naturally generalized to more dimensions. - */ - StaticVector< Dimension, Real > domainUpperCorner; - - //! Dimension of the global domain. - StaticVector< Dimension, Index > globalDimensions; - - //! Dimension of the local subdomain. - StaticVector< Dimension, Index > subdomainDimensions; - - //! Number of the distributed grid nodes along each dimension. - StaticVector< Dimension, int > gridDimensions; - - //! Coordinates of this node of the distributed grid. - StaticVector< Dimension, int > nodeCoordinates; - - //! Here are device IDs taken from the tnlCommunicator. - /*!*** - * In 2D, this is the device ID of the neighbor on the - * right and above. - */ - StaticVector< Dimension, int > uppperNeighbors; - - //! Here are device IDs taken from the tnlCommunicator. - /*!*** - * In 2D, this is the device ID of the neighbor on the - * left and below. - */ - StaticVector< Dimension, int > lowerNeighbors; - - //! Here are widths of overlaps at subdomain boundaries with neighbors. - /*!*** - * These overlaps are necessary for exchange of data - * between neighboring nodes. In 2D, here are overlaps - * with the neighbors on the right and above. - */ - StaticVector< Dimension, Index > upperSubdomainsOverlaps; - - //! Here are widths of overlaps at subdomain boundaries with neighbors. - /*!*** - * These overlaps are necessary for exchange of data - * between neighboring nodes. In 2D, here are overlaps - * with the neighbors on the left and below. - */ - StaticVector< Dimension, Index > lowerSubdomainsOverlaps; - -}; - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: tnlDistributedGrid( const String& name ) - : Object( name ) -{ - -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -bool tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: init( tnlCommunicator* communicator, - const GridType& grid, - const StaticVector< Dimension, int >& gridDimensions, - const StaticVector< Dimension, Index >& subdomainOverlaps ) -{ - -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -tnlCommunicator* tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getCommunicator() const -{ - return communicator; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Real >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getDomainLowerCorner() const -{ - return domainLowerCorner; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Real >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getDomainUpperCorner() const -{ - return domainUpperCorner; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Index >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getDimensions() const -{ - return globalDimensions; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, int >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getGridDimensions() const -{ - return gridDimensions; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, int >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getLowerNeighbors() const -{ - return lowerNeighbors; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Index >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getLowerSubdomainsOverlaps() const -{ - return lowerSubdomainsOverlaps; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, int >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getNodeCoordinates() const -{ - return nodeCoordinates; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Index >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getSubdomainDimensions() const -{ - return subdomainDimensions; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, Index >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getUpperSubdomainsOverlaps() const -{ - return upperSubdomainsOverlaps; -} - -template< int Dimension, typename GridType, typename Device, typename Real, typename Index > -const StaticVector< Dimension, int >& tnlDistributedGrid< Dimension, GridType, Device, Real, Index > :: getUppperNeighbors() const -{ - return uppperNeighbors; -} - -#endif /* TNLDISTRIBUTEDGRID_H_ */ diff --git a/src/TNL/legacy/tnl-benchmarks.cpp b/src/TNL/legacy/tnl-benchmarks.cpp deleted file mode 100644 index d62fd60f1e858a232b8d61fdf4b441856172ee21..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/tnl-benchmarks.cpp +++ /dev/null @@ -1,48 +0,0 @@ -/*************************************************************************** - tnl-benchmarks.cpp - description - ------------------- - begin : Nov 25, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#include <TNL/TimerRT.h> -#include <TNL/Containers/Vector.h> -#include <TNL/Containers/VectorCUDA.h> -#include <TNL/tnl-cuda-kernels.cu.h> -#include <TNL/tnl-benchmarks.h> - - -int main( int argc, char* argv[] ) -{ - std::cout << "Benchmarking memory bandwidth when transfering int ..." << std::endl; - - const int size = 1 << 22; - double host_to_host_band_width; - double host_to_device_band_width; - double device_to_host_band_width; - double device_to_device_band_width; - - transferBenchmark< int >( size, - host_to_host_band_width, - host_to_device_band_width, - device_to_host_band_width, - device_to_device_band_width ); - - - std::cout << "Benchmarking reduction of int ..." << std::endl; - for( int i = 0; i <= 6; i ++ ) - reductionBenchmark< int >( size, i ); - - std::cout << "Benchmarking reduction of float ..." << std::endl; - for( int i = 0; i <= 6; i ++ ) - reductionBenchmark< float >( size, i ); - - std::cout << "Benchmarking reduction of double ..." << std::endl; - for( int i = 0; i <= 6; i ++ ) - reductionBenchmark< double >( size / 2, i ); - - return EXIT_SUCCESS; -} diff --git a/src/TNL/legacy/tnl-benchmarks.h b/src/TNL/legacy/tnl-benchmarks.h deleted file mode 100644 index df9af1cf43231d36e5460cd410b3c7756529ada9..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/tnl-benchmarks.h +++ /dev/null @@ -1,239 +0,0 @@ -/*************************************************************************** - tnl-benchmarks.h - description - ------------------- - begin : Jan 27, 2010 - copyright : (C) 2010 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#ifndef TNLBENCHMARKS_H_ -#define TNLBENCHMARKS_H_ - -#include <TNL/Math.h> - -template< class T > -bool transferBenchmark( const int size, - double& host_to_host_band_width, - double& host_to_device_band_width, - double& device_to_host_band_width, - double& device_to_device_band_width ) -{ - - Vector< T > host_vector( "transferBenchmark:host-vector", size ); - Vector< T > host_vector2( "transferBenchmark:host-vector-2", size ); - VectorCUDA< T > device_vector( "transferBenchmark:device-vector", size ); - VectorCUDA< T > device_vector2( "transferBenchmark:device-vector-2", size ); - - for( int i = 0; i < size; i ++ ) - host_vector[ i ] = i + 1; - - const long int cycles = 100; - long int bytes = cycles * size * sizeof( int ); - long int mega_byte = 1 << 20; - - TimerRT timer; - timer. Reset(); - for( int i = 0; i < cycles; i ++ ) - if( ! host_vector2. copyFrom( host_vector ) ) - return false; - double time = timer. getTime(); - double giga_byte = ( double ) ( 1 << 30 ); - host_to_host_band_width = bytes / giga_byte / time; - - std::cout << "Transfering " << bytes / mega_byte << " MB from HOST to HOST took " << time << " seconds. Bandwidth is " << host_to_host_band_width << " GB/s." << std::endl; - - timer. Reset(); - for( int i = 0; i < cycles; i ++ ) - if( ! device_vector. copyFrom( host_vector ) ) - return false; - time = timer. getTime(); - host_to_device_band_width = bytes / giga_byte / time; - - std::cout << "Transfering " << bytes / mega_byte << " MB from HOST to DEVICE took " << time << " seconds. Bandwidth is " << host_to_device_band_width << " GB/s." << std::endl; - - timer. Reset(); - for( int i = 0; i < cycles; i ++ ) - if( ! host_vector2. copyFrom( device_vector ) ) - return false; - time = timer. getTime(); - device_to_host_band_width = bytes / giga_byte / time; - - std::cout << "Transfering " << bytes / mega_byte << " MB from DEVICE to HOST took " << time << " seconds. Bandwidth is " << device_to_host_band_width << " GB/s." << std::endl; - - timer. Reset(); - for( int i = 0; i < cycles; i ++ ) - if( ! device_vector2. copyFrom( device_vector ) ) - return false; - - time = timer. getTime(); - - // Since we read and write tha data back we process twice as many bytes. - bytes *= 2; - device_to_device_band_width = bytes / giga_byte / time; - - std::cout << "Transfering " << bytes / mega_byte << " MB from DEVICE to DEVICE took " << time << " seconds. Bandwidth is " << device_to_device_band_width << " GB/s." << std::endl; -} - -template< class T > -void tnlCPUReductionSum( const Vector< T >& host_vector, - T& sum ) -{ - const T* data = host_vector. Data(); - const int size = host_vector. GetSize(); - sum = 0.0; - for( int i = 0; i < size; i ++ ) - sum += data[ i ]; -}; - -template< class T > -void tnlCPUReductionMin( const Vector< T >& host_vector, - T& min ) -{ - const T* data = host_vector. Data(); - const int size = host_vector. GetSize(); - //TNL_ASSERT( data ); - min = data[ 0 ]; - for( int i = 1; i < size; i ++ ) - min = :: min( min, data[ i ] ); -}; - -template< class T > -void tnlCPUReductionMax( const Vector< T >& host_vector, - T& max ) -{ - const T* data = host_vector. Data(); - const int size = host_vector. GetSize(); - //TNL_ASSERT( data ); - max = data[ 0 ]; - for( int i = 1; i < size; i ++ ) - max = :: max( max, data[ i ] ); -}; - -template< class T > -void reductionBenchmark( const int size, - const int algorithm ) -{ - Vector< T > host_vector( "reductionBenchmark:host-vector", size ); - VectorCUDA< T > device_vector( "reductionBenchmark:device-vector", size ); - VectorCUDA< T > device_aux( "reductionBenchmark:device-aux", size / 2 ); - - for( int i = 0; i < size; i ++ ) - host_vector[ i ] = i + 1; - - device_vector. copyFrom( host_vector ); - - T sum, min, max; - const long int reducing_cycles( 10 ); - - TimerRT timer; - timer. Reset(); - for( int i = 0; i < reducing_cycles; i ++ ) - { - switch( algorithm ) - { - case 0: // reduction on CPU - tnlCPUReductionSum( host_vector, sum ); - tnlCPUReductionMin( host_vector, sum ); - tnlCPUReductionMax( host_vector, sum ); - case 1: - Devices::CudaSimpleReduction1Sum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - Devices::CudaSimpleReduction1Min( size, - device_vector. Data(), - min, - device_aux. Data() ); - Devices::CudaSimpleReduction1Max( size, - device_vector. Data(), - max, - device_aux. Data() ); - break; - case 2: - Devices::CudaSimpleReduction2Sum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - Devices::CudaSimpleReduction2Min( size, - device_vector. Data(), - min, - device_aux. Data() ); - Devices::CudaSimpleReduction2Max( size, - device_vector. Data(), - max, - device_aux. Data() ); - break; - case 3: - Devices::CudaSimpleReduction3Sum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - Devices::CudaSimpleReduction3Min( size, - device_vector. Data(), - min, - device_aux. Data() ); - Devices::CudaSimpleReduction3Max( size, - device_vector. Data(), - max, - device_aux. Data() ); - break; - case 4: - Devices::CudaSimpleReduction4Sum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - Devices::CudaSimpleReduction4Min( size, - device_vector. Data(), - min, - device_aux. Data() ); - Devices::CudaSimpleReduction4Max( size, - device_vector. Data(), - max, - device_aux. Data() ); - break; - case 5: - Devices::CudaSimpleReduction5Sum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - Devices::CudaSimpleReduction5Min( size, - device_vector. Data(), - min, - device_aux. Data() ); - Devices::CudaSimpleReduction5Max( size, - device_vector. Data(), - max, - device_aux. Data() ); - break; - default: - CudaReductionSum( size, - device_vector. Data(), - sum, - device_aux. Data() ); - CudaReductionMin( size, - device_vector. Data(), - min, - device_aux. Data() ); - CudaReductionMax( size, - device_vector. Data(), - max, - device_aux. Data() ); - - } - } - const double time = timer. getTime(); - double giga_byte = ( double ) ( 1 << 30 ); - long int mega_byte = 1 << 20; - long int bytes_reduced = size * sizeof( T ) * reducing_cycles * 3; - const double reduction_band_width = bytes_reduced / giga_byte / time; - - std::cout << "Reducing " << bytes_reduced / mega_byte - << " MB on DEVICE using algorithm " << algorithm - << " took " << time - << " seconds. Bandwidth is " << reduction_band_width - << " GB/s." << std::endl; -} - -#endif /* TNLBENCHMARKS_H_ */ diff --git a/src/TNL/legacy/tnlMatrix_impl.h b/src/TNL/legacy/tnlMatrix_impl.h deleted file mode 100644 index 558f18ab16fba204b2bd9f118617bdae74ce60e9..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/tnlMatrix_impl.h +++ /dev/null @@ -1,359 +0,0 @@ -/*************************************************************************** - tnlMatrix_impl.h - description - ------------------- - begin : Dec 18, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef TNLMATRIX_IMPL_H_ -#define TNLMATRIX_IMPL_H_ - -#include <matrices/tnlMatrix.h> -#include <core/TNL_ASSERT.h> - -template< typename Real, - typename Device, - typename Index > -tnlMatrix< Real, Device, Index >::tnlMatrix() -: rows( 0 ), - columns( 0 ), - numberOfColors( 0 ) -{ -} - -template< typename Real, - typename Device, - typename Index > - bool tnlMatrix< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) -{ - TNL_ASSERT( rows > 0 && columns > 0, - std::cerr << " rows = " << rows << " columns = " << columns ); - this->rows = rows; - this->columns = columns; - return true; -} - -template< typename Real, - typename Device, - typename Index > -void tnlMatrix< Real, Device, Index >::getRowLengths( Containers::Vector< IndexType, DeviceType, IndexType >& rowLengths ) const -{ - rowLengths.setSize( this->getRows() ); - for( IndexType row = 0; row < this->getRows(); row++ ) - rowLengths.setElement( row, this->getRowLength( row ) ); -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, - typename Device2, - typename Index2 > -bool tnlMatrix< Real, Device, Index >::setLike( const tnlMatrix< Real2, Device2, Index2 >& matrix ) -{ - return setDimensions( matrix.getRows(), matrix.getColumns() ); -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Index tnlMatrix< Real, Device, Index >::getRows() const -{ - return this->rows; -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Index tnlMatrix< Real, Device, Index >::getColumns() const -{ - return this->columns; -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA -__device__ __host__ -#endif -Index tnlMatrix< Real, Device, Index >::getNumberOfColors() const -{ - return this->numberOfColors; -} - -template< typename Real, - typename Device, - typename Index > -void tnlMatrix< Real, Device, Index >::reset() -{ - this->rows = 0; - this->columns = 0; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Matrix > -bool tnlMatrix< Real, Device, Index >::copyFrom( const Matrix& matrix, - const CompressedRowLengthsVector& rowLengths ) -{ - /*tnlStaticAssert( DeviceType::DeviceType == Devices::HostDevice, ); - tnlStaticAssert( DeviceType::DeviceType == Matrix:DeviceType::DeviceType, );*/ - - this->setLike( matrix ); - if( ! this->setCompressedRowLengths( rowLengths ) ) - return false; - Containers::Vector< RealType, Devices::Host, IndexType > values; - Containers::Vector< IndexType, Devices::Host, IndexType > columns; - if( ! values.setSize( this->getColumns() ) || - ! columns.setSize( this->getColumns() ) ) - return false; - for( IndexType row = 0; row < this->getRows(); row++ ) - { - matrix.getRow( row, columns.getData(), values.getData() ); - this->setRow( row, columns.getData(), values.getData(), rowLengths.getElement( row ) ); - } - return true; -} - -template< typename Real, - typename Device, - typename Index > -tnlMatrix< Real, Device, Index >& tnlMatrix< Real, Device, Index >::operator = ( const tnlMatrix< RealType, DeviceType, IndexType >& m ) -{ - this->setLike( m ); - - Containers::Vector< IndexType, DeviceType, IndexType > rowLengths; - m.getRowLengths( rowLengths ); - this->setCompressedRowLengths( rowLengths ); - - Containers::Vector< RealType, DeviceType, IndexType > rowValues; - Containers::Vector< IndexType, DeviceType, IndexType > rowColumns; - const IndexType maxRowLength = rowLengths.max(); - rowValues.setSize( maxRowLength ); - rowColumns.setSize( maxRowLength ); - for( IndexType row = 0; row < this->getRows(); row++ ) - { - m.getRow( row, - rowColumns.getData(), - rowValues.getData() ); - this->setRow( row, - rowColumns.getData(), - rowValues.getData(), - m.getRowLength( row ) ); - } -} - -template< typename Real, - typename Device, - typename Index > - template< typename Matrix > -bool tnlMatrix< Real, Device, Index >::operator == ( const Matrix& matrix ) const -{ - if( this->getRows() != matrix.getRows() || - this->getColumns() != matrix.getColumns() ) - return false; - for( IndexType row = 0; row < this->getRows(); row++ ) - for( IndexType column = 0; column < this->getColumns(); column++ ) - if( this->getElement( row, column ) != matrix.getElement( row, column ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Matrix > -bool tnlMatrix< Real, Device, Index >::operator != ( const Matrix& matrix ) const -{ - return ! operator == ( matrix ); -} - -template< typename Real, - typename Device, - typename Index > -bool tnlMatrix< Real, Device, Index >::save( File& file ) const -{ - if( ! tnlObject::save( file ) || - ! file.write( &this->rows ) || - ! file.write( &this->columns ) || - ! this->values.save( file ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool tnlMatrix< Real, Device, Index >::load( File& file ) -{ - if( ! tnlObject::load( file ) || - ! file.read( &this->rows ) || - ! file.read( &this->columns ) || - ! this->values.load( file ) ) - return false; - return true; -} - -/* -template< typename Real, - typename Device, - typename Index > -void tnlMatrix< Real, Device, Index >::computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) -{ - this->numberOfColors = 0; - - for( IndexType i = this->getRows() - 1; i >= 0; i-- ) - { - // init color array - Containers::Vector< Index, Device, Index > usedColors; - usedColors.setSize( this->numberOfColors ); - for( IndexType j = 0; j < this->numberOfColors; j++ ) - usedColors.setElement( j, 0 ); - - // find all colors used in given row - - } -} - */ - -template< typename Real, - typename Device, - typename Index > -void tnlMatrix< Real, Device, Index >::print( ostream& str ) const -{ -} - -template< typename Real, - typename Device, - typename Index > -bool tnlMatrix< Real, Device, Index >::help( bool verbose ) -{ - return true; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -void tnlMatrix< Real, Device, Index >::copyFromHostToCuda( tnlMatrix< Real, Devices::Host, Index >& matrix ) -{ - this->numberOfColors = matrix.getNumberOfColors(); - this->columns = matrix.getColumns(); - this->rows = matrix.getRows(); - - this->values.setSize( matrix.getValuesSize() ); -} - -template< typename Real, - typename Device, - typename Index > -Index tnlMatrix< Real, Device, Index >::getValuesSize() const -{ - return this->values.getSize(); -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -void tnlMatrix< Real, Device, Index >::computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) -{ - for( IndexType i = this->getRows() - 1; i >= 0; i-- ) - { - // init color array - Containers::Vector< Index, Device, Index > usedColors; - usedColors.setSize( this->numberOfColors ); - for( IndexType j = 0; j < this->numberOfColors; j++ ) - usedColors.setElement( j, 0 ); - - // find all colors used in given row - for( IndexType j = i + 1; j < this->getColumns(); j++ ) - if( this->getElement( i, j ) != 0.0 ) - usedColors.setElement( colorsVector.getElement( j ), 1 ); - - // find unused color - bool found = false; - for( IndexType j = 0; j < this->numberOfColors; j++ ) - if( usedColors.getElement( j ) == 0 ) - { - colorsVector.setElement( i, j ); - found = true; - break; - } - if( !found ) - { - colorsVector.setElement( i, this->numberOfColors ); - this->numberOfColors++; - } - } -} - -#ifdef HAVE_CUDA -template< typename Matrix, - typename InVector, - typename OutVector > -__global__ void tnlMatrixVectorProductCudaKernel( const Matrix* matrix, - const InVector* inVector, - OutVector* outVector, - int gridIdx ) -{ - tnlStaticAssert( Matrix::DeviceType::DeviceType == tnlCudaDevice, ); - const typename Matrix::IndexType rowIdx = ( gridIdx * tnlCuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < matrix->getRows() ) - ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); -} -#endif - -template< typename Matrix, - typename InVector, - typename OutVector > -void tnlMatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = tnlCuda::passToDevice( matrix ); - InVector* kernel_inVector = tnlCuda::passToDevice( inVector ); - OutVector* kernel_outVector = tnlCuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % tnlCuda::getMaxGridSize(); - tnlMatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_this, - kernel_inVector, - kernel_outVector, - gridIdx ); - } - tnlCuda::freeFromDevice( kernel_this ); - tnlCuda::freeFromDevice( kernel_inVector ); - tnlCuda::freeFromDevice( kernel_outVector ); - TNL_CHECK_CUDA_DEVICE; -#endif -} - -#endif /* TNLMATRIX_IMPL_H_ */ diff --git a/src/TNL/legacy/tnlSparseMatrix_impl.h b/src/TNL/legacy/tnlSparseMatrix_impl.h deleted file mode 100644 index 5b4b3ddc1c53878c4a73f3eaadb6d63698e0a82e..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/tnlSparseMatrix_impl.h +++ /dev/null @@ -1,169 +0,0 @@ -/*************************************************************************** - SparseMatrix_impl.h - description - ------------------- - begin : Dec 21, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/*************************************************************************** - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - ***************************************************************************/ - -#ifndef SparseMATRIX_IMPL_H_ -#define SparseMATRIX_IMPL_H_ - -template< typename Real, - typename Device, - typename Index > -SparseMatrix< Real, Device, Index >::SparseMatrix() -: maxRowLength( 0 ) -{ -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, - typename Device2, - typename Index2 > -bool SparseMatrix< Real, Device, Index >::setLike( const SparseMatrix< Real2, Device2, Index2 >& matrix ) -{ - if( ! tnlMatrix< Real, Device, Index >::setLike( matrix ) || - ! this->allocateMatrixElements( matrix.getNumberOfMatrixElements() ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > -Index SparseMatrix< Real, Device, Index >::getNumberOfMatrixElements() const -{ - return this->values.getSize(); -} - -template< typename Real, - typename Device, - typename Index > -Index SparseMatrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const -{ - IndexType nonzeroElements( 0 ); - for( IndexType i = 0; i < this->values.getSize(); i++ ) - if( this->columnIndexes.getElement( i ) != this-> columns && - this->values.getElement( i ) != 0.0 ) - nonzeroElements++; - return nonzeroElements; -} - -template< typename Real, - typename Device, - typename Index > -Index -SparseMatrix< Real, Device, Index >:: -getMaxRowLength() const -{ - return this->maxRowLength; -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Index SparseMatrix< Real, Device, Index >::getPaddingIndex() const -{ - return this->getColumns(); -} - -template< typename Real, - typename Device, - typename Index > -void SparseMatrix< Real, Device, Index >::reset() -{ - tnlMatrix< Real, Device, Index >::reset(); - this->values.reset(); - this->columnIndexes.reset(); -} - -template< typename Real, - typename Device, - typename Index > -bool SparseMatrix< Real, Device, Index >::save( File& file ) const -{ - if( ! tnlMatrix< Real, Device, Index >::save( file ) || - ! this->values.save( file ) || - ! this->columnIndexes.save( file ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool SparseMatrix< Real, Device, Index >::load( File& file ) -{ - if( ! tnlMatrix< Real, Device, Index >::load( file ) || - ! this->values.load( file ) || - ! this->columnIndexes.load( file ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -Containers::Vector< Index, Device, Index > SparseMatrix< Real, Device, Index >::getColumnIndexes() -{ - return this->columnIndexes; -} - -template< typename Real, - typename Device, - typename Index > -#ifdef HAVE_CUDA - __device__ __host__ -#endif -void SparseMatrix< Real, Device, Index >::copyFromHostToCuda( SparseMatrix< Real, Devices::Host, Index >& matrix ) -{ - tnlMatrix< Real, Device, Index >::copyFromHostToCuda( matrix ); - - this->columnIndexes.setSize( matrix.getValuesSize() ); - this->columnIndexes.setValue( this->getPaddingIndex() ); - this->maxRowLength = matrix.getMaxRowLength(); -} - -template< typename Real, - typename Device, - typename Index > -bool SparseMatrix< Real, Device, Index >::allocateMatrixElements( const IndexType& numberOfMatrixElements ) -{ - if( ! this->values.setSize( numberOfMatrixElements ) || - ! this->columnIndexes.setSize( numberOfMatrixElements ) ) - return false; - - /**** - * Setting a column index to this->columns means that the - * index is undefined. - */ - this->columnIndexes.setValue( this->columns ); - return true; -} - -template< typename Real, - typename Device, - typename Index > -void SparseMatrix< Real, Device, Index >::printStructure( ostream& str ) const -{ -} - -#endif /* SparseMATRIX_IMPL_H_ */ diff --git a/src/TNL/legacy/vdb/TODO b/src/TNL/legacy/vdb/TODO deleted file mode 100755 index 17eb48a8144cd72046b207e49ea4d79dc757fb83..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/TODO +++ /dev/null @@ -1,7 +0,0 @@ -echo -echo -echo "Add integration to tnl library -- that means: templates for" -echo "Device types. Add updateTree method. Add Values class to store " -echo "interpoled values in each node. Compare speed with original" -echo "VDB. Implement on CUDA. Implement 3D version (should be trivial)." -echo diff --git a/src/TNL/legacy/vdb/draw.py b/src/TNL/legacy/vdb/draw.py deleted file mode 100644 index 4df7253b5934595266c9bd440678b1e2067e7daf..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/draw.py +++ /dev/null @@ -1,69 +0,0 @@ -import re -from pyx import * - -filename = "nodesLevel_" -depth = 5 -colors = [ - color.cmyk.Yellow, - color.rgb.green, - color.rgb.blue, - color.rgb.red, - color.rgb.black -] -c = canvas.canvas() -for i in range(depth): - with open(filename + str(i), 'r') as f: - lines = f.readlines() - getnumbers = re.compile(r"\d+") - aux = getnumbers.findall(lines[0]) - region = {"x1": int(aux[0]), - "x2": int(aux[1]), - "y1": int(aux[2]), - "y2": int(aux[3]), - "level": int(aux[4]) - } - aux = getnumbers.findall(lines[1]) - splitting = {"splitx": int(aux[0]), - "splity": int(aux[1]), - "logx": int(aux[2]), - "logy": int(aux[3]) - } - states = [] - for j in range(3, len(lines)): - aux = getnumbers.findall(lines[j]) - states.append( - {"x": int(aux[0]), - "y": int(aux[1]), - "state": int(aux[2])} - ) - lengthx = region.get("x2") - region.get("x1") - rectsx = splitting.get("splitx") * \ - (splitting.get("logx") ** region.get("level")) - stepx = lengthx / rectsx - lengthy = region.get("y2") - region.get("y1") - rectsy = splitting.get("splity") * \ - (splitting.get("logy") ** region.get("level")) - stepy = lengthy / rectsy - print(str(stepx)) - print(str(stepy)) - for state in states: - if state.get("state") and i < depth - 1: - c.stroke(path.rect(state.get("x") * stepx, - state.get("y") * stepy, - stepx, - stepy), - [deco.filled([colors[i]])]) - elif i == 0: - c.stroke(path.rect(state.get("x") * stepx, - state.get("y") * stepy, - stepx, - stepy), - [deco.filled([color.rgb.white])]) - elif i == depth - 1: - c.fill(path.rect(state.get("x") * stepx, - state.get("y") * stepy, - stepx, - stepy), - [deco.filled([color.rgb.black])]) -c.writePDFfile(filename) - diff --git a/src/TNL/legacy/vdb/make b/src/TNL/legacy/vdb/make deleted file mode 100755 index a169859cce3df7d3d82034e026e9ebb2e582247c..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/make +++ /dev/null @@ -1 +0,0 @@ -g++ -O1 -g -Wall -std=c++11 -Werror tnlInternalNode.h tnlInternalNode_impl.h tnlLeafNode.h tnlLeafNode_impl.h tnlNode.h tnlNode_impl.h tnlVDBMath.h tnlRootNode_test.cpp tnlRootNode.h tnlRootNode_impl.h tnlArea2D.h tnlArea2D_impl.h tnlCircle2D.h tnlCircle2D_impl.h tnlBitmaskArray.h tnlBitmaskArray_impl.h tnlBitmask.h tnlBitmask_impl.h -o test diff --git a/src/TNL/legacy/vdb/tnlArea2D.h b/src/TNL/legacy/vdb/tnlArea2D.h deleted file mode 100644 index 70f0416076dab21378b291829b4b96c4f5b670cf..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlArea2D.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _TNLAREA2D_H_INCLUDED_ -#define _TNLAREA2D_H_INCLUDED_ - -template< typename Real > -class tnlArea2D -{ -public: - tnlArea2D( Real startX, - Real endX, - Real startY, - Real endY ); - - Real getStartX(); - - Real getEndX(); - - Real getLengthX(); - - Real getStartY(); - - Real getEndY(); - - Real getLengthY(); - - ~tnlArea2D(){}; - -private: - Real startX; - Real endX; - Real startY; - Real endY; -}; - -#include "tnlArea2D_impl.h" -#endif // _TNLAREA2D_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlArea2D_impl.h b/src/TNL/legacy/vdb/tnlArea2D_impl.h deleted file mode 100644 index 7a7c2e7fa885ecf3d96e02eba54ff4cd6e5722c7..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlArea2D_impl.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _TNLAREA2D_IMPL_H_INCLUDED_ -#define _TNLAREA2D_IMPL_H_INCLUDED_ - -#include "tnlArea2D.h" - -template< typename Real > -tnlArea2D< Real >::tnlArea2D( Real startX, - Real endX, - Real startY, - Real endY ) -{ - this->startX = startX; - this->endX = endX; - this->startY = startY; - this->endY = endY; -} - -template< typename Real > -Real tnlArea2D< Real >::getStartX() -{ - return this->startX; -} - -template< typename Real > -Real tnlArea2D< Real >::getEndX() -{ - return this->endX; -} - -template< typename Real > -Real tnlArea2D< Real >::getLengthX() -{ - return this->endX - this->startX; -} - -template< typename Real > -Real tnlArea2D< Real >::getStartY() -{ - return this->startY; -} - -template< typename Real > -Real tnlArea2D< Real >::getEndY() -{ - return this->endY; -} - -template< typename Real > -Real tnlArea2D< Real >::getLengthY() -{ - return this->endY - this->startY; -} - -#endif // _TNLAREA2D_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlBitmask.h b/src/TNL/legacy/vdb/tnlBitmask.h deleted file mode 100644 index 0c69362ec7d5dc5a4454b85c2564164243843f09..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlBitmask.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _TNLBITMASK_H_INCLUDED_ -#define _TNLBITMASK_H_INCLUDED_ - -#include <cstdint> - -class tnlBitmask -{ -public: - tnlBitmask( bool state, unsigned x, unsigned y ); - - tnlBitmask( tnlBitmask* bitmask ); - - bool getState(); - - unsigned getX(); - - unsigned getY(); - - uint64_t getBitmask(); - - ~tnlBitmask(){}; - -private: - uint64_t bitmask; -}; - -#include "tnlBitmask_impl.h" -#endif //_TNLBITMASK_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlBitmaskArray.h b/src/TNL/legacy/vdb/tnlBitmaskArray.h deleted file mode 100644 index c54e22b0ce9e7f97c8e08c41ba48a74f980d94b2..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlBitmaskArray.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _TNLBITMASKARRAY_H_INCLUDED_ -#define _TNLBITMASKARRAY_H_INCLUDED_ - -#include "tnlBitmask.h" - -template< unsigned Size > -class tnlBitmaskArray -{ -public: - tnlBitmaskArray(); - - unsigned getSize(); - - void setIthBitmask( unsigned i, - tnlBitmask bitmask ); - - tnlBitmask* getIthBitmask( unsigned i ); - - ~tnlBitmaskArray(); - -private: - tnlBitmask* bitmaskArray[ Size ]; - unsigned length; -}; - -#include "tnlBitmaskArray_impl.h" -#endif // _TNLBITMASKARRAY_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlBitmaskArray_impl.h b/src/TNL/legacy/vdb/tnlBitmaskArray_impl.h deleted file mode 100644 index 829e184ae36d7666281d26bbd6b58107e75473c4..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlBitmaskArray_impl.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef _TNLBITMASKARRAY_IMPL_H_INCLUDED_ -#define _TNLBITMASKARRAY_IMPL_H_INCLUDED_ - -#include <cassert> -#include "tnlBitmask.h" -#include "tnlBitmaskArray.h" - -template< unsigned Size > -tnlBitmaskArray< Size >::tnlBitmaskArray() -{ - this->length = Size; -} - -template< unsigned Size > -unsigned tnlBitmaskArray< Size >::getSize() -{ - return this->length; -} - -template< unsigned Size > -void tnlBitmaskArray< Size >::setIthBitmask( unsigned i, - tnlBitmask bitmask ) -{ - assert( i < Size ); - this->bitmaskArray[ i ] = new tnlBitmask( bitmask ); -} - -template< unsigned Size > -tnlBitmask* tnlBitmaskArray< Size >::getIthBitmask( unsigned i ) -{ - assert( i < Size ); - return this->bitmaskArray[ i ]; -} - -template< unsigned Size > -tnlBitmaskArray< Size >::~tnlBitmaskArray() -{ - for( int i = 0; i < this->length; i++ ) - delete this->bitmaskArray[ i ]; - delete this->bitmaskArray; -} - -#endif // _TNLBITMASKARRAY_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlBitmask_impl.h b/src/TNL/legacy/vdb/tnlBitmask_impl.h deleted file mode 100644 index 7deea649abdb9d9509a0cc89917327856f1385ff..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlBitmask_impl.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _TNLBITMASK_IMPL_H_INCLUDED_ -#define _TNLBITMASK_IMPL_H_INCLUDED_ - -#include <iostream> -#include <cstdint> -#include "tnlBitmask.h" - -using namespace std; - -tnlBitmask::tnlBitmask( bool state, - unsigned x, - unsigned y ) -/* - variables x and y have at most 30 active bits -*/ -{ - uint64_t state64 = state; - uint64_t x64 = x; - x64 <<= 4; - uint64_t y64 = y; - y64 <<= 34; - this->bitmask = x64 | y64 | state64; -} - -tnlBitmask::tnlBitmask( tnlBitmask* bitmask ) -{ - this->bitmask = bitmask->getBitmask(); -} - -bool tnlBitmask::getState() -{ - return this->bitmask & 1; -} - -unsigned tnlBitmask::getX() -{ - unsigned mask = 3 << 30; - unsigned x = this->bitmask >> 4; - return ( unsigned ) ( x & ( ~mask ) ); -} - -unsigned tnlBitmask::getY() -{ - unsigned mask = 3 << 30; - uint64_t y = this->bitmask >> 34; - return ( unsigned ) ( y & ( ~mask ) ); -} - -uint64_t tnlBitmask::getBitmask() -{ - return this->bitmask; -} - -#endif //_TNLBITMASK_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlCircle2D.h b/src/TNL/legacy/vdb/tnlCircle2D.h deleted file mode 100644 index d40393f7941d4af0e2abcffc092449b55bee6a3c..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlCircle2D.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _TNLCIRCLE2D_H_INCLUDED_ -#define _TNLCIRCLE2D_H_INCLUDED_ - -template< typename Real > -class tnlCircle2D -{ -public: - tnlCircle2D( unsigned a, - unsigned b, - unsigned r ); - - bool isIntercept( Real x1, - Real x2, - Real y1, - Real y2, - bool verbose = false ); - - bool isInInterval( Real x1, - Real x2, - Real x ); - - ~tnlCircle2D(); - -private: - // x and y define center of the circle - // r defines its radius - unsigned a; - unsigned b; - unsigned r; -}; - -#include "tnlCircle2D_impl.h" -#endif // _TNLCIRCLE2D_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlCircle2D_impl.h b/src/TNL/legacy/vdb/tnlCircle2D_impl.h deleted file mode 100644 index a0871726d89914ed179acd2a635c3448395a7fca..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlCircle2D_impl.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef _TNLCIRCLE2D_IMPL_H_INCLUDED_ -#define _TNLCIRCLE2D_IMPL_H_INCLUDED_ - -#include <iostream> -#include <cmath> -#include "tnlCircle2D.h" - -template< typename Real > -tnlCircle2D< Real >::tnlCircle2D( unsigned a, - unsigned b, - unsigned r ) -{ - this->a = a; - this->b = b; - this->r = r; -} - -template< typename Real > -bool tnlCircle2D< Real >::isIntercept( Real x1, - Real x2, - Real y1, - Real y2, - bool verbose ) -{ - if( this->isInInterval( x1, x2, this->a - this->r ) && - this->isInInterval( x1, x2, this->a + this->r ) && - this->isInInterval( y1, y2, this->b - this->r ) && - this->isInInterval( y1, y2, this->b + this->r ) ) - { - if( verbose ) - std::cout << "Circle is inside area." << std::endl; - return true; - } - else if( verbose ) - std::cout << "Circle is not inside area." << std::endl; - - Real R = this->r * this->r; - - Real aux = x1 - this->a; - if( R - aux * aux >= 0 && - ( this->isInInterval( y1, y2, sqrt( R - aux * aux ) + this->b ) || - this->isInInterval( y1, y2, -sqrt( R - aux * aux ) + this->b ) ) ) - { - if( verbose ) - std::cout << "Circle intercepts left boundry of area." << std::endl; - return true; - } - - aux = x2 - this->a; - if( R - aux * aux >= 0 && - ( this->isInInterval( y1, y2, sqrt( R - aux * aux ) + this->b ) || - this->isInInterval( y1, y2, -sqrt( R - aux * aux ) + this->b ) ) ) - { - if( verbose ) - std::cout << "Circle intercepts right boundry of area." << std::endl; - return true; - } - - aux = y1 - this->b; - if( R - aux * aux >= 0 && - ( this->isInInterval( x1, x2, sqrt( R - aux * aux ) + this->a ) || - this->isInInterval( x1, x2, -sqrt( R - aux * aux ) + this->a ) ) ) - { - if( verbose ) - std::cout << "Circle intercepts bottom boundry of area." << std::endl; - return true; - } - - aux = y2 - this->b; - if( R - aux * aux >= 0 && - ( this->isInInterval( x1, x2, sqrt( R - aux * aux ) + this->a ) || - this->isInInterval( x1, x2, sqrt( R - aux * aux ) + this->a ) ) ) - { - if( verbose ) - std::cout << "Circle intercepts top boundry of area." << std::endl; - return true; - } - - if( verbose ) - std::cout << "Circle does not intercept area." << std::endl; - - return false; -} - -template< typename Real > -bool tnlCircle2D< Real >::isInInterval( Real x1, - Real x2, - Real x ) -{ - return ( ( x1 <= x ) and ( x <= x2 ) ); -} - -#endif // _TNLCIRCLE2D_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlInternalNode.h b/src/TNL/legacy/vdb/tnlInternalNode.h deleted file mode 100644 index 4281c303a016f769d1556b12383fd628fcf726a4..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlInternalNode.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _TNLINTERNALNODE_H_INCLUDED_ -#define _TNLINTERNALNODE_H_INCLUDED_ - -#include "tnlNode.h" - -template< typename Real, - typename Index, - Index LogX, - Index LogY = LogX > -class tnlInternalNode : public tnlNode< Real, Index, LogX, LogY > -{ -public: - tnlInternalNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ); - - void setNode( Index splitX, - Index splitY, - Index depth ); - - void setChildren( Index splitX, - Index splitY, - Index depth ); - - void write( fstream& f, - Index level ); - - ~tnlInternalNode(); - -private: - tnlBitmaskArray< LogX * LogY >* bitmaskArray; - tnlNode< Real, Index, LogX, LogY >* children[ LogX * LogY ]; -}; - - -#include "tnlInternalNode_impl.h" -#endif // _TNLINTERNALNODE_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlInternalNode_impl.h b/src/TNL/legacy/vdb/tnlInternalNode_impl.h deleted file mode 100644 index 63bdd40e41a4f29e7af689a95b49bda22ed90176..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlInternalNode_impl.h +++ /dev/null @@ -1,112 +0,0 @@ -#ifndef _TNLINTERNALNODE_IMPL_H_INCLUDED_ -#define _TNLINTERNALNODE_IMPL_H_INCLUDED_ - -#include <iostream> -#include <iomanip> -#include "tnlInternalNode.h" -#include "tnlLeafNode.h" - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlInternalNode< Real, Index, LogX, LogY >::tnlInternalNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ) -: tnlNode< Real, Index, LogX, LogY >::tnlNode( area, circle, X, Y, level ) -{ - this->bitmaskArray = new tnlBitmaskArray< LogX * LogY >(); -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlInternalNode< Real, Index, LogX, LogY >::setNode( Index splitX, - Index splitY, - Index depth ) -{ - tnlNode< Real, Index, LogX, LogY >::setNode( splitX, splitY, this->bitmaskArray ); - this->setChildren( splitX, splitY, depth ); -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlInternalNode< Real, Index, LogX, LogY >::setChildren( Index splitX, - Index splitY, - Index depth ) -{ - for( Index i = 0; i < LogY; i++ ) - for( Index j = 0; j < LogX; j++ ) - { - Index index = i * LogY + j; - if( !this->bitmaskArray->getIthBitmask( index )->getState() ) - this->children[ index ] = NULL; - else if( this->level < depth - 1 ) - { - //std::cout << "creating new node, level = " << this->level << std::endl; - Index X = this->X * LogX + j; - Index Y = this->Y * LogY + i; - this->children[ index ] = new tnlInternalNode< Real, Index, LogX, LogY >( this->area, - this->circle, - X, - Y, - this->level + 1 ); - this->children[ index ]->setNode( splitX, splitY, depth ); - } - else - { - Index X = this->X * LogX + j; - Index Y = this->Y * LogY + i; - this->children[ index ] = new tnlLeafNode< Real, Index, LogX, LogY >( this->area, - this->circle, - X, - Y, - this->level + 1 ); - this->children[ index ]->setNode( splitX, splitY, depth ); - } - } -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlInternalNode< Real, Index, LogX, LogY >::write( fstream& file, - Index level ) -{ - for( Index i = 0; i < LogX * LogY; i++ ) - { - if( this->level == level ) - { - Index x = this->bitmaskArray->getIthBitmask( i )->getX(); - Index y = this->bitmaskArray->getIthBitmask( i )->getY(); - bool state = this->bitmaskArray->getIthBitmask( i )->getState(); - file << "x=" << setw( 10 ) << x - << ", y=" << setw( 10 ) << y - << ", state=" << setw( 1 ) << state - << std::endl; - } - else if( this->children[ i ] ) - this->children[ i ]->write( file, level ); - } -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlInternalNode< Real, Index, LogX, LogY >::~tnlInternalNode() -{ - delete this->bitmaskArray; - for( Index i = 0; i < LogX * LogY; i++ ) - delete this->children[ i ]; - delete [] this->children; -} - - -#endif // _TNLINTERNALNODE_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlLeafNode.h b/src/TNL/legacy/vdb/tnlLeafNode.h deleted file mode 100644 index 6d24efe83be24fc3ec96afcb72981f9ae0f4844f..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlLeafNode.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _TNLLEAFNODE_H_INCLUDED_ -#define _TNLLEAFNODE_H_INCLUDED_ - -#include <fstream> -#include "tnlNode.h" -#include "tnlArea2D.h" -#include "tnlCircle2D.h" -#include "tnlBitmask.h" - -template< typename Real, - typename Index, - Index LogX, - Index LogY = LogX > -class tnlLeafNode : public tnlNode< Real, Index, LogX, LogY > -{ -public: - tnlLeafNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ); - - void setNode( Index splitX, - Index splitY, - Index depth ); - - void write( fstream& file, - Index level ); - - ~tnlLeafNode(); - -private: - tnlBitmaskArray< LogX * LogY >* bitmaskArray; -}; - -#include "tnlLeafNode_impl.h" -#endif // _TNLLEAFNODE_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlLeafNode_impl.h b/src/TNL/legacy/vdb/tnlLeafNode_impl.h deleted file mode 100644 index 1b228ee559f7a4f0d93afd6617c2b9e6ce048e8b..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlLeafNode_impl.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _TNLLEAFNODE_IMPL_H_INCLUDED_ -#define _TNLLEAFNODE_IMPL_H_INCLUDED_ - -#include "tnlLeafNode.h" -#include <iostream> -#include <iomanip> - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlLeafNode< Real, Index, LogX, LogY >::tnlLeafNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ) -: tnlNode< Real, Index, LogX, LogY >::tnlNode( area, circle, X, Y, level ) -{ - this->bitmaskArray = new tnlBitmaskArray< LogX * LogY >(); -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlLeafNode< Real, Index, LogX, LogY >::setNode( Index splitX, - Index splitY, - Index depth ) -{ - tnlNode< Real, Index, LogX, LogY >::setNode( splitX, splitY, this->bitmaskArray ); -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlLeafNode< Real, Index, LogX, LogY >::write( fstream& file, - Index level ) -{ - for( Index i = 0; i < LogX * LogY; i++ ) - { - Index x = this->bitmaskArray->getIthBitmask( i )->getX(); - Index y = this->bitmaskArray->getIthBitmask( i )->getY(); - bool state = this->bitmaskArray->getIthBitmask( i )->getState(); - file << "x=" << setw( 10 ) << x - << ", y=" << setw( 10 ) << y - << ", state=" << setw( 1 ) << state - << std::endl; - } -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlLeafNode< Real, Index, LogX, LogY >::~tnlLeafNode() -{ - delete this->bitmaskArray; -} - -#endif // _TNLLEAFNODE_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlNode.h b/src/TNL/legacy/vdb/tnlNode.h deleted file mode 100644 index 7910e4bbeec976924b7b6a008765a695ff2b8ac0..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlNode.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _TNLNODE_H_INCLUDED_ -#define _TNLNODE_H_INCLUDED_ - -#include "tnlBitmaskArray.h" -#include "tnlArea2D.h" -#include "tnlCircle2D.h" -#include <fstream> - - -template< typename Real, - typename Index, - Index LogX, - Index LogY = LogX > -class tnlNode -{ -public: - tnlNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ); - - void setNode( Index splitX, - Index splitY, - tnlBitmaskArray< LogX * LogY >* bitmaskArray ); - - virtual void setNode( Index splitX = 0, - Index splitY = 0, - Index depth = 0 ){}; - - virtual void write( fstream& f, - Index level ){}; - - Index getLevel(); - - ~tnlNode(); - -protected: - tnlArea2D< Real >* area; - tnlCircle2D< Real >* circle; - Index X, Y, level; -}; - -#include "tnlNode_impl.h" -#endif // _TNLNODE_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlNode_impl.h b/src/TNL/legacy/vdb/tnlNode_impl.h deleted file mode 100644 index fb6f80b54e1b80045f5532dd7fcc684b053cb061..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlNode_impl.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _TNLNODE_IMPL_H_INCLUDED_ -#define _TNLNODE_IMPL_H_INCLUDED_ - -#include "tnlNode.h" -#include "tnlVDBMath.h" - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlNode< Real, Index, LogX, LogY >::tnlNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - Index X, - Index Y, - Index level ) -{ - this->area = area; - this->circle = circle; - this->level = level; - this->X = X; - this->Y = Y; -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -Index tnlNode< Real, Index, LogX, LogY >::getLevel() -{ - return this->level; -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -void tnlNode< Real, Index, LogX, LogY >::setNode( Index splitX, - Index splitY, - tnlBitmaskArray< LogX * LogY >* bitmaskArray ) -{ - - Index depthX = splitX * tnlVDBMath< Index >::power( LogX, this->level - 1 ); - Index depthY = splitY * tnlVDBMath< Index >::power( LogY, this->level - 1 ); - Real stepX = ( Real ) this->area->getLengthX() / depthX; - Real stepY = ( Real ) this->area->getLengthY() / depthY; - Real startX = this->X * stepX; - Real endX = ( this->X + 1 ) * stepX; - Real startY = this->Y * stepY; - Real endY = ( this->Y + 1 ) * stepY; - Real dx = ( endX - startX ) / LogX; - Real dy = ( endY - startY ) / LogY; - for( Index i = 0; i < LogY; i++ ) - for( Index j = 0; j < LogX; j++ ) - { - Real x1 = startX + j * dx; - Real x2 = startX + ( j + 1 ) * dx; - Real y1 = startY + i * dy; - Real y2 = startY + ( i + 1 ) * dy; - bool state = this->circle->isIntercept( x1, x2, y1, y2 ); - Index posX = this->X * LogX + j; - Index posY = this->Y * LogY + i; - tnlBitmask* bitmask = new tnlBitmask( state, posX, posY ); - bitmaskArray->setIthBitmask( i * LogX + j, bitmask ); - } -} - -template< typename Real, - typename Index, - Index LogX, - Index LogY > -tnlNode< Real, Index, LogX, LogY >::~tnlNode() -{ - this->area = NULL; - this->circle = NULL; -} - -#endif // _TNLNODE_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlRootNode.h b/src/TNL/legacy/vdb/tnlRootNode.h deleted file mode 100644 index b944549c5d4b32e647438388d8590bd7d8b1ddc9..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlRootNode.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef _TNLROOTNODE_H_INCLUDED_ -#define _TNLROOTNODE_H_INCLUDED_ - -#include "tnlNode.h" - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY = LogX > -class tnlRootNode : public tnlNode< Real, Index, LogX, LogY > -{ -public: - tnlRootNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - unsigned nodesX, - unsigned nodesY, - unsigned depth ); - - void setNode(); - - void createTree(); - - void write(); - - ~tnlRootNode(); - -private: - unsigned nodesX; - unsigned nodesY; - tnlBitmaskArray< Size >* bitmaskArray; - tnlNode< Real, Index, LogX, LogY >* children[ Size ]; - unsigned depth; -}; - -#include "tnlRootNode_impl.h" -#endif // _TNLROOTNODE_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlRootNode_impl.h b/src/TNL/legacy/vdb/tnlRootNode_impl.h deleted file mode 100644 index 8bd0ecb01553b005c6e21a744979be78f814be55..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlRootNode_impl.h +++ /dev/null @@ -1,154 +0,0 @@ -#ifndef _TNLROOTNODE_IMPL_H_INCLUDED_ -#define _TNLROOTNODE_IMPL_H_INCLUDED_ - -#include <iostream> -#include <iomanip> -#include <string> -#include "tnlNode.h" -#include "tnlRootNode.h" -#include "tnlInternalNode.h" -#include "tnlLeafNode.h" - - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY > -tnlRootNode< Real, Index, Size, LogX, LogY >::tnlRootNode( tnlArea2D< Real >* area, - tnlCircle2D< Real >* circle, - unsigned nodesX, - unsigned nodesY, - unsigned depth ) -: tnlNode< Real, Index, LogX, LogY >::tnlNode( area, circle, 0, 0, 0 ) -{ - this->nodesX = nodesX; - this->nodesY = nodesY; - this->bitmaskArray = new tnlBitmaskArray< Size >(); - this->depth = depth; -} - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY > -void tnlRootNode< Real, Index, Size, LogX, LogY >::setNode() -{ - Real stepX = ( this->area->getEndX() - this->area->getStartX() ) / this->nodesX; - Real stepY = ( this->area->getEndY() - this->area->getStartY() ) / this->nodesY; - Real startX = this->area->getStartX(); - Real startY = this->area->getStartY(); - for( Index i = 0; i < this->nodesX; i++ ) - for( Index j = 0; j < this->nodesY; j++ ) - { - Real x1 = startX + j * stepX; - Real x2 = startX + ( j + 1 ) * stepX; - Real y1 = startY + i * stepY; - Real y2 = startY + ( i + 1 ) * stepY; - bool state = this->circle->isIntercept( x1, x2, y1, y2 ); - Index X = j; - Index Y = i; - tnlBitmask* bitmask = new tnlBitmask( state, X, Y ); - this->bitmaskArray->setIthBitmask( i * this->nodesX + j, bitmask); - } -} - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY > -void tnlRootNode< Real, Index, Size, LogX, LogY >::createTree() -{ - this->setNode(); // first we need to create root node - for( Index i = 0; i < this->nodesY; i++ ) - for( Index j = 0; j < this-> nodesX; j++ ) - { - Index index = i * this->nodesY + j; - if( !this->bitmaskArray->getIthBitmask( index )->getState() ) - this->children[ index ] = NULL; - else if( this->level < this->depth - 1 ) - { - Index X = j; - Index Y = i; - this->children[ index ] = new tnlInternalNode< Real, Index, LogX, LogY >( this->area, - this->circle, - X, - Y, - this->level + 1 ); - this->children[ index ]->setNode( nodesX, nodesY, this->depth ); - } - else - { - Index X = j; - Index Y = i; - this->children[ index ] = new tnlLeafNode< Real, Index, LogX, LogY >( this->area, - this->circle, - X, - Y, - this->level + 1 ); - this->children[ index ]->setNode( nodesX, nodesY, this->depth ); - } - } -} - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY > -void tnlRootNode< Real, Index, Size, LogX, LogY >::write() -{ - for( Index i = 0; i < this->depth; i++ ) - { - std::string filename = "nodesLevel_" + std::to_string( i ); - fstream f; - f.open( filename, ios::out | ios::trunc ); - Index startX = this->area->getStartX(); - Index endX = this->area->getEndX(); - Index startY = this->area->getStartY(); - Index endY = this->area->getEndY(); - f << "startx=" << setw( 10 ) << startX - << ", endx=" << setw( 10 ) << endX - << ", starty=" <<setw( 10 ) << startY - << ", endy=" << setw( 10 ) << endY - << ", level=" << setw( 10 ) << i - << std::endl; - f << "rootSplitX=" << setw( 10 ) << this->nodesX - << ", rootSplitY=" << setw( 10 ) << this->nodesY - << ", LogX=" << setw( 10 ) << LogX - << ", LogY=" << setw( 10 ) << LogY - << std::endl << std::endl; - for( Index j = 0; j < Size; j++ ) - { - if( this->level == i ) - { - Index x = this->bitmaskArray->getIthBitmask( j )->getX(); - Index y = this->bitmaskArray->getIthBitmask( j )->getY(); - bool state = this->bitmaskArray->getIthBitmask( j )->getState(); - f << "x=" << setw( 10 ) << x - << ", y=" << setw( 10 ) << y - << ", state=" << setw( 1 ) << state - << std::endl; - } - else if( this->children[ j ] ) - this->children[ j ]->write( f, i ); - } - } -} - -template< typename Real, - typename Index, - unsigned Size, - Index LogX, - Index LogY > -tnlRootNode< Real, Index, Size, LogX, LogY >::~tnlRootNode() -{ - delete this->bitmaskArray; - for( Index i = 0; i < Size; i++ ) - delete this->children[ i ]; - delete [] this->children; -} - -#endif // _TNLROOTNODE_IMPL_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/tnlRootNode_test.cpp b/src/TNL/legacy/vdb/tnlRootNode_test.cpp deleted file mode 100644 index 09ac03c49f6d32c63e7f31e62fed4400800fcb40..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlRootNode_test.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include <iostream> -#include <fstream> -#include <cstdlib> -#include <ctime> -#include "tnlRootNode.h" -#include "tnlArea2D.h" -#include "tnlCircle2D.h" - -int main( int argc, char** argv ) -{ - clock_t begin = clock(); - int areaStart = atoi( argv[ 1 ] ); - int areaEnd = atoi( argv[ 2 ] ); - int circleX = atoi( argv[ 3 ] ); - int circleY = atoi( argv[ 4 ] ); - int radius = atoi( argv[ 5 ] ); - const unsigned x = 4; - const unsigned y = 4; - const unsigned size = x * y; - tnlArea2D< double >* area = new tnlArea2D< double >( areaStart, areaEnd, areaStart, areaEnd ); - tnlCircle2D< double >* circle = new tnlCircle2D< double >( circleX, circleY, radius ); - tnlRootNode< double, int, size, x, y >* root = new tnlRootNode< double, int, size, x, y >( area, circle, x, y, 6 ); - root->createTree(); - clock_t end1 = clock(); - root->write(); - clock_t end2 = clock(); - std::cout << "Tree created in " << ( ( double ) (end1 - begin) ) / CLOCKS_PER_SEC << "s" << std::endl; - std::cout << "Tree traversed in " << ( ( double )(end2 - begin) ) / CLOCKS_PER_SEC << "s" << std::endl; - return 0; -} diff --git a/src/TNL/legacy/vdb/tnlVDBMath.h b/src/TNL/legacy/vdb/tnlVDBMath.h deleted file mode 100644 index 4917a19f263e14145948dc61e30bcdecce965532..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/tnlVDBMath.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _TNLVDBMATH_H_INCLUDED_ -#define _TNLVDBMATH_H_INCLUDED_ - -template< typename Index > -class tnlVDBMath -{ -public: - static Index power( Index number, - Index exponent ) - { - Index result = 1; - for( Index i = 0; i < exponent; i++ ) - result *= number; - return result; - } - -}; - -#endif // _TNLVDBMATH_H_INCLUDED_ diff --git a/src/TNL/legacy/vdb/unittests_vdb/tnlBitmaskArray_test.cpp b/src/TNL/legacy/vdb/unittests_vdb/tnlBitmaskArray_test.cpp deleted file mode 100644 index 93de1856b54098539d5798c7fc8c46ae7a518a3a..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/unittests_vdb/tnlBitmaskArray_test.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include <iostream> -#include "tnlBitmaskArray.h" - -int main() -{ - tnlBitmaskArray< 5 >* arr = new tnlBitmaskArray< 5 >(); - return 0; -} diff --git a/src/TNL/legacy/vdb/unittests_vdb/tnlBitmask_unitTest.cpp b/src/TNL/legacy/vdb/unittests_vdb/tnlBitmask_unitTest.cpp deleted file mode 100644 index da0d6df0822286a7e50afe1dd6fb60dd312b5b30..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/unittests_vdb/tnlBitmask_unitTest.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include <iostream> -#include <cstdint> -#include "tnlBitmask.h" - -using namespace std; - -int main() -{ - for( int i = 0; i < 50000; i++ ) - { - bool state = i % 2; - unsigned x = rand() % ( 1 << 30 ); - unsigned y = rand() % ( 1 << 30 ); - tnlBitmask* mask = new tnlBitmask( state, x, y ); - if( state != mask->getState() || - x != mask->getX() || - y != mask->getY() ) - cout << "state = " << state << ", mask.getState() = " << mask->getState() - << "x = " << x << ", mask.getX() = " << mask->getX() - << "y = " << y << ", mask.getY() = " << mask->getY() << endl; - } -} diff --git a/src/TNL/legacy/vdb/unittests_vdb/tnlCircle2D_unitTest.cpp b/src/TNL/legacy/vdb/unittests_vdb/tnlCircle2D_unitTest.cpp deleted file mode 100644 index 1c177eb8e35c512bc9faca11d22aad2e248ee669..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/unittests_vdb/tnlCircle2D_unitTest.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include <iostream> -#include "tnlCircle2D.h" - -using namespace std; - -int main() -{ // dost spatnej unittest -- vylepsit - tnlCircle2D* circle = new tnlCircle2D( 5, 5, 4 ); - cout << "Testing whole circle inside area: "; - if( circle->isIntercept( 0, 10, 0, 10, true ) ) - cout << "Ok" << endl; - else - cout << "Test failed." << endl; - - cout << "Testing whole area inside circle: "; - if( !circle->isIntercept( 4, 6, 4, 6, true ) ) - cout << "Ok" << endl; - else - cout << "Test failed." << endl; - - cout << "Testing left boundry intercept: "; - if( circle->isIntercept( 3, 7, 0, 2, true ) ) - cout << "Ok" << endl; - else - cout << "Test failed." << endl; - return 0; -} diff --git a/src/TNL/legacy/vdb/unittests_vdb/tnlRootNode_test.cpp b/src/TNL/legacy/vdb/unittests_vdb/tnlRootNode_test.cpp deleted file mode 100644 index de8eb97fed32be675d3b9148d161119b79eb33cb..0000000000000000000000000000000000000000 --- a/src/TNL/legacy/vdb/unittests_vdb/tnlRootNode_test.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include <iostream> -#include "tnlRootNode.h" -#include "tnlArea2D.h" -#include "tnlCircle2D.h" - -int main() -{ - const unsigned x = 4; - const unsigned y = 4; - const unsigned size = x * y; - tnlArea2D* area = new tnlArea2D( 0, 20, 0, 20 ); - tnlCircle2D* circle = new tnlCircle2D( 10, 10, 4 ); - tnlRootNode< size >* root = new tnlRootNode< size >( area, circle, x, y ); - root->setNode(); - root->printStates(); - return 0; -} diff --git a/src/UnitTests/Containers/ArrayOperationsTest.h b/src/UnitTests/Containers/ArrayOperationsTest.h index 109e947649bec236bcb7b7e64bcc84eacd44aef7..aff044601cfcc326fa43134d6da5d903cdabf5f5 100644 --- a/src/UnitTests/Containers/ArrayOperationsTest.h +++ b/src/UnitTests/Containers/ArrayOperationsTest.h @@ -209,11 +209,11 @@ TYPED_TEST( ArrayOperationsTest, allocateMemory_cuda ) ValueType* data; ArrayOperations< Devices::Cuda >::allocateMemory( data, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); ASSERT_NE( data, nullptr ); ArrayOperations< Devices::Cuda >::freeMemory( data ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) @@ -223,7 +223,7 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) ValueType* data; ArrayOperations< Devices::Cuda >::allocateMemory( data, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < size; i++ ) ArrayOperations< Devices::Cuda >::setMemoryElement( &data[ i ], (ValueType) i ); @@ -237,7 +237,7 @@ TYPED_TEST( ArrayOperationsTest, setMemoryElement_cuda ) } ArrayOperations< Devices::Cuda >::freeMemory( data ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); } TYPED_TEST( ArrayOperationsTest, setMemory_cuda ) @@ -250,9 +250,9 @@ TYPED_TEST( ArrayOperationsTest, setMemory_cuda ) ArrayOperations< Devices::Cuda >::allocateMemory( deviceData, size ); ArrayOperations< Devices::Host >::setMemory( hostData, (ValueType) 0, size ); ArrayOperations< Devices::Cuda >::setMemory( deviceData, (ValueType) 13, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory< ValueType, ValueType >( hostData, deviceData, size ); - ASSERT_TRUE( TNL_CHECK_CUDA_DEVICE ); + ASSERT_NO_THROW( TNL_CHECK_CUDA_DEVICE ); for( int i = 0; i < size; i++ ) EXPECT_EQ( hostData[ i ], 13 ); ArrayOperations< Devices::Host >::freeMemory( hostData ); diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index e451c0416f5f14e63af1e2a4a3bb117159f16229..f92954df3be4b2bb850859cad143f1ec9d0daae2 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -62,19 +62,16 @@ protected: // types for which ArrayTest is instantiated using ArrayTypes = ::testing::Types< - Array< short, Devices::Host, short >, Array< int, Devices::Host, short >, Array< long, Devices::Host, short >, Array< float, Devices::Host, short >, Array< double, Devices::Host, short >, Array< MyData, Devices::Host, short >, - Array< short, Devices::Host, int >, Array< int, Devices::Host, int >, Array< long, Devices::Host, int >, Array< float, Devices::Host, int >, Array< double, Devices::Host, int >, Array< MyData, Devices::Host, int >, - Array< short, Devices::Host, long >, Array< int, Devices::Host, long >, Array< long, Devices::Host, long >, Array< float, Devices::Host, long >, @@ -83,19 +80,16 @@ using ArrayTypes = ::testing::Types< // FIXME: this segfaults in String::~String() // Array< String, Devices::Host, long >, #ifdef HAVE_CUDA - Array< short, Devices::Cuda, short >, Array< int, Devices::Cuda, short >, Array< long, Devices::Cuda, short >, Array< float, Devices::Cuda, short >, Array< double, Devices::Cuda, short >, Array< MyData, Devices::Cuda, short >, - Array< short, Devices::Cuda, int >, Array< int, Devices::Cuda, int >, Array< long, Devices::Cuda, int >, Array< float, Devices::Cuda, int >, Array< double, Devices::Cuda, int >, Array< MyData, Devices::Cuda, int >, - Array< short, Devices::Cuda, long >, Array< int, Devices::Cuda, long >, Array< long, Devices::Cuda, long >, Array< float, Devices::Cuda, long >, @@ -103,21 +97,18 @@ using ArrayTypes = ::testing::Types< Array< MyData, Devices::Cuda, long >, #endif #ifdef HAVE_MIC - Array< short, Devices::MIC, short >, Array< int, Devices::MIC, short >, Array< long, Devices::MIC, short >, Array< float, Devices::MIC, short >, Array< double, Devices::MIC, short >, // TODO: MyData does not work on MIC // Array< MyData, Devices::MIC, short >, - Array< short, Devices::MIC, int >, Array< int, Devices::MIC, int >, Array< long, Devices::MIC, int >, Array< float, Devices::MIC, int >, Array< double, Devices::MIC, int >, // TODO: MyData does not work on MIC // Array< MyData, Devices::MIC, int >, - Array< short, Devices::MIC, long >, Array< int, Devices::MIC, long >, Array< long, Devices::MIC, long >, Array< float, Devices::MIC, long >, diff --git a/src/UnitTests/Containers/ArrayViewTest.h b/src/UnitTests/Containers/ArrayViewTest.h index 7d37a3b78be2c5c006fc48ad9a943a6bd27ca39b..2fa6fb3afd551dabf3b26386004096ce5028226a 100644 --- a/src/UnitTests/Containers/ArrayViewTest.h +++ b/src/UnitTests/Containers/ArrayViewTest.h @@ -14,7 +14,9 @@ #include <type_traits> #include <TNL/Containers/Array.h> +#include <TNL/Containers/Vector.h> #include <TNL/Containers/ArrayView.h> +#include <TNL/Containers/VectorView.h> #include "gtest/gtest.h" @@ -53,91 +55,95 @@ std::ostream& operator<<( std::ostream& str, const MyData& v ) // test fixture for typed tests -template< typename Array > +template< typename View > class ArrayViewTest : public ::testing::Test { protected: - using ArrayType = Array; - using ViewType = ArrayView< typename Array::ValueType, typename Array::DeviceType, typename Array::IndexType >; + using ViewType = View; + using ArrayType = Array< typename View::ValueType, typename View::DeviceType, typename View::IndexType >; }; // types for which ArrayViewTest is instantiated -using ArrayTypes = ::testing::Types< - Array< short, Devices::Host, short >, - Array< int, Devices::Host, short >, - Array< long, Devices::Host, short >, - Array< float, Devices::Host, short >, - Array< double, Devices::Host, short >, - Array< MyData, Devices::Host, short >, - Array< short, Devices::Host, int >, - Array< int, Devices::Host, int >, - Array< long, Devices::Host, int >, - Array< float, Devices::Host, int >, - Array< double, Devices::Host, int >, - Array< MyData, Devices::Host, int >, - Array< short, Devices::Host, long >, - Array< int, Devices::Host, long >, - Array< long, Devices::Host, long >, - Array< float, Devices::Host, long >, - Array< double, Devices::Host, long >, - Array< MyData, Devices::Host, long > +using ViewTypes = ::testing::Types< + ArrayView< int, Devices::Host, short >, + ArrayView< long, Devices::Host, short >, + ArrayView< float, Devices::Host, short >, + ArrayView< double, Devices::Host, short >, + ArrayView< MyData, Devices::Host, short >, + ArrayView< int, Devices::Host, int >, + ArrayView< long, Devices::Host, int >, + ArrayView< float, Devices::Host, int >, + ArrayView< double, Devices::Host, int >, + ArrayView< MyData, Devices::Host, int >, + ArrayView< int, Devices::Host, long >, + ArrayView< long, Devices::Host, long >, + ArrayView< float, Devices::Host, long >, + ArrayView< double, Devices::Host, long >, + ArrayView< MyData, Devices::Host, long >, // FIXME: this segfaults in String::~String() -// , Array< String, Devices::Host, long > +// , ArrayView< String, Devices::Host, long >, #ifdef HAVE_CUDA - , - Array< short, Devices::Cuda, short >, - Array< int, Devices::Cuda, short >, - Array< long, Devices::Cuda, short >, - Array< float, Devices::Cuda, short >, - Array< double, Devices::Cuda, short >, - Array< MyData, Devices::Cuda, short >, - Array< short, Devices::Cuda, int >, - Array< int, Devices::Cuda, int >, - Array< long, Devices::Cuda, int >, - Array< float, Devices::Cuda, int >, - Array< double, Devices::Cuda, int >, - Array< MyData, Devices::Cuda, int >, - Array< short, Devices::Cuda, long >, - Array< int, Devices::Cuda, long >, - Array< long, Devices::Cuda, long >, - Array< float, Devices::Cuda, long >, - Array< double, Devices::Cuda, long >, - Array< MyData, Devices::Cuda, long > + ArrayView< int, Devices::Cuda, short >, + ArrayView< long, Devices::Cuda, short >, + ArrayView< float, Devices::Cuda, short >, + ArrayView< double, Devices::Cuda, short >, + ArrayView< MyData, Devices::Cuda, short >, + ArrayView< int, Devices::Cuda, int >, + ArrayView< long, Devices::Cuda, int >, + ArrayView< float, Devices::Cuda, int >, + ArrayView< double, Devices::Cuda, int >, + ArrayView< MyData, Devices::Cuda, int >, + ArrayView< int, Devices::Cuda, long >, + ArrayView< long, Devices::Cuda, long >, + ArrayView< float, Devices::Cuda, long >, + ArrayView< double, Devices::Cuda, long >, + ArrayView< MyData, Devices::Cuda, long >, #endif #ifdef HAVE_MIC - , - Array< short, Devices::MIC, short >, - Array< int, Devices::MIC, short >, - Array< long, Devices::MIC, short >, - Array< float, Devices::MIC, short >, - Array< double, Devices::MIC, short >, + ArrayView< int, Devices::MIC, short >, + ArrayView< long, Devices::MIC, short >, + ArrayView< float, Devices::MIC, short >, + ArrayView< double, Devices::MIC, short >, // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, short >, - Array< short, Devices::MIC, int >, - Array< int, Devices::MIC, int >, - Array< long, Devices::MIC, int >, - Array< float, Devices::MIC, int >, - Array< double, Devices::MIC, int >, +// ArrayView< MyData, Devices::MIC, short >, + ArrayView< int, Devices::MIC, int >, + ArrayView< long, Devices::MIC, int >, + ArrayView< float, Devices::MIC, int >, + ArrayView< double, Devices::MIC, int >, // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, int >, - Array< short, Devices::MIC, long >, - Array< int, Devices::MIC, long >, - Array< long, Devices::MIC, long >, - Array< float, Devices::MIC, long >, - Array< double, Devices::MIC, long > +// ArrayView< MyData, Devices::MIC, int >, + ArrayView< int, Devices::MIC, long >, + ArrayView< long, Devices::MIC, long >, + ArrayView< float, Devices::MIC, long >, + ArrayView< double, Devices::MIC, long >, // TODO: MyData does not work on MIC -// Array< MyData, Devices::MIC, long > +// ArrayView< MyData, Devices::MIC, long >, +#endif + + // all ArrayView tests should also work with VectorView + // (but we can't test all types because the argument list would be too long...) + VectorView< float, Devices::Host, long >, + VectorView< double, Devices::Host, long > +#ifdef HAVE_CUDA + , + VectorView< float, Devices::Cuda, long >, + VectorView< double, Devices::Cuda, long > +#endif +#ifdef HAVE_MIC + , + VectorView< float, Devices::MIC, long >, + VectorView< double, Devices::MIC, long > #endif >; -TYPED_TEST_CASE( ArrayViewTest, ArrayTypes ); +TYPED_TEST_CASE( ArrayViewTest, ViewTypes ); TYPED_TEST( ArrayViewTest, constructors ) { using ArrayType = typename TestFixture::ArrayType; using ViewType = typename TestFixture::ViewType; - using ConstViewType = ArrayView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >; + using ConstViewType = VectorView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >; ArrayType a( 10 ); EXPECT_EQ( a.getSize(), 10 ); @@ -410,6 +416,7 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) { using ArrayType = typename TestFixture::ArrayType; using ViewType = typename TestFixture::ViewType; + using ConstViewType = VectorView< const typename ArrayType::ValueType, typename ArrayType::DeviceType, typename ArrayType::IndexType >; ArrayType a( 10 ), b( 10 ); typename ArrayType::HostType a_host( 10 ); @@ -437,6 +444,11 @@ TYPED_TEST( ArrayViewTest, assignmentOperator ) u_host = u; EXPECT_EQ( u_host, u ); EXPECT_EQ( u_host.getData(), a_host.getData() ); + + // assignment of const view to non-const view + v.setValue( 0 ); + ConstViewType c( u ); + v = c; } // test works only for arithmetic types diff --git a/src/UnitTests/Containers/VectorTest.h b/src/UnitTests/Containers/VectorTest.h index d2cf8217f80fbe4503eed73f62ab81bd3970fe6a..47d3908921dcdcdda5689df8ba8e4e424b84c2f2 100644 --- a/src/UnitTests/Containers/VectorTest.h +++ b/src/UnitTests/Containers/VectorTest.h @@ -755,6 +755,23 @@ TEST( VectorSpecialCasesTest, initializationOfVectorViewByArrayView ) EXPECT_EQ( v_view.sum(), 0 ); } +TEST( VectorSpecialCasesTest, defaultConstructors ) +{ + using ArrayType = Containers::Array< int, Devices::Host >; + using VectorViewType = VectorView< int, Devices::Host >; + using ArrayViewType = ArrayView< int, Devices::Host >; + + ArrayType a( 100 ); + a.setValue( 0 ); + + ArrayViewType a_view; + a_view.bind( a ); + + VectorViewType v_view; + v_view.bind( a ); + EXPECT_EQ( v_view.getData(), a_view.getData() ); +} + #endif // HAVE_GTEST diff --git a/src/UnitTests/Functions/Functions.h b/src/UnitTests/Functions/Functions.h index 59a0335c15af53b880f716318e571688f593d91c..88ba18bc8784cf72eb59301185b6c6c9a7acd321 100644 --- a/src/UnitTests/Functions/Functions.h +++ b/src/UnitTests/Functions/Functions.h @@ -1,5 +1,5 @@ /*************************************************************************** - DistributedGridTest.cpp - description + Functions.h - description ------------------- begin : Sep 6, 2017 copyright : (C) 2017 by Tomas Oberhuber et al. diff --git a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp index 4907f1d269cfc3d7f5d205405546675f1b8d58fe..000a832b6011cd7b444ed27d141a8debbfec7c38 100644 --- a/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp +++ b/src/UnitTests/Meshes/DistributedMeshes/CutDistributedMeshFunctionTest.cpp @@ -578,7 +578,7 @@ TEST(CutDistributedMeshFunction, 3D_2_Save) StaticVector<1,typename CutMeshType::IndexType>(4) ); - String FileName=String("/tmp/test-file.tnl"); + String FileName=String("test-file.tnl"); if(inCut) { MeshFunction<CutMeshType> cutMeshFunction; @@ -612,7 +612,8 @@ TEST(CutDistributedMeshFunction, 3D_2_Save) loadMeshFunctionptr.bind(globalCutGrid,loaddof); File file; - file.open( FileName, IOMode::read ); + bool ok=file.open( FileName, IOMode::read ); + TNL_ASSERT_TRUE(ok,"Cannot open file"); loadMeshFunctionptr.boundLoad(file); file.close(); @@ -630,9 +631,12 @@ TEST(CutDistributedMeshFunction, 3D_2_Save) fromEntity.refresh(); outEntity.refresh(); - EXPECT_EQ(loadMeshFunctionptr.getValue(outEntity), (*linearFunctionPtr)(fromEntity)) << "Error in Left overlap"; + EXPECT_EQ(loadMeshFunctionptr.getValue(outEntity), (*linearFunctionPtr)(fromEntity)) << "Error at "<< i <<" "<< j; } + + EXPECT_EQ( std::remove( FileName.getString()) , 0 ); + EXPECT_EQ( std::remove( (FileName+String("-mesh.tnl")).getString()) , 0 ); } } diff --git a/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp b/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp index e8625bc3d6ab57d3ae01b23d99e68dc34792d7a2..0d2bc77ede3577153944398c8a61c6370d1bf876 100644 --- a/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp +++ b/src/UnitTests/Meshes/DistributedMeshes/DirectionsTest.cpp @@ -3,7 +3,6 @@ #include <TNL/Meshes/DistributedMeshes/Directions.h> #include <TNL/Containers/StaticVector.h> -#include <TNL/StaticVectorFor.h> using namespace TNL::Meshes::DistributedMeshes; using namespace TNL::Containers; diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h index 9ac299621b15b85ff2bd31126af19c647a9158e2..5a0a43bcfb31c48da4a1069bde5f1f3847eba3f7 100644 --- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h +++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIOTest.h @@ -244,8 +244,8 @@ class TestDistributedGridIO linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr); - String FileName=String("/tmp/test-file.tnl"); - DistributedGridIO<MeshFunctionType> ::save(FileName, *meshFunctionptr ); + String fileName=String("test-file-distriburtegrid-io-save.tnl"); + DistributedGridIO<MeshFunctionType> ::save(fileName, *meshFunctionptr ); //create similar local mesh function and evaluate linear function on it @@ -273,8 +273,10 @@ class TestDistributedGridIO loadDof.setValue(-1); + String localFileName= fileName+String("-")+distributedGrid.printProcessCoords()+String(".tnl"); + File file; - file.open( FileName+String("-")+distributedGrid.printProcessCoords(), IOMode::read ); + file.open(localFileName, IOMode::read ); loadMeshFunctionptr->boundLoad(file); file.close(); @@ -282,6 +284,11 @@ class TestDistributedGridIO { EXPECT_EQ( localDof.getElement(i), loadDof.getElement(i)) << "Compare Loaded and evaluated Dof Failed for: "<< i; } + + EXPECT_EQ( std::remove( localFileName.getString()) , 0 ); + + //remove meshfile + EXPECT_EQ( std::remove( (fileName+String("-mesh-")+distributedGrid.printProcessCoords()+String(".tnl")).getString()) , 0 ); } static void TestLoad() @@ -326,9 +333,10 @@ class TestDistributedGridIO linearFunctionEvaluator.evaluateAllEntities(localMeshFunctionptr , linearFunctionPtr); - String FileName=String("/tmp/test-file.tnl"); + String fileName=String("test-file-distributedgrid-io-load.tnl"); + String localFileName=fileName+String("-")+distributedGrid.printProcessCoords()+String(".tnl"); File file; - file.open( FileName+String("-")+distributedGrid.printProcessCoords(), IOMode::write ); + file.open( localFileName, IOMode::write ); localMeshFunctionptr->save(file); file.close(); @@ -343,7 +351,7 @@ class TestDistributedGridIO loadDof.setValue(0); loadMeshFunctionptr->bind(loadGridptr,loadDof); - DistributedGridIO<MeshFunctionType> ::load(FileName, *loadMeshFunctionptr ); + DistributedGridIO<MeshFunctionType> ::load(fileName, *loadMeshFunctionptr ); loadMeshFunctionptr->template synchronize<CommunicatorType>(); //need synchronization for overlaps to be filled corectly in loadDof @@ -363,7 +371,9 @@ class TestDistributedGridIO for(int i=0;i<dof.getSize();i++) { EXPECT_EQ( dof.getElement(i), loadDof.getElement(i)) << "Compare Loaded and evaluated Dof Failed for: "<< i; - } + } + + EXPECT_EQ( std::remove( localFileName.getString()) , 0 ); } }; diff --git a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h index a68fe628e69bcf434dabe68953180dfa6a4a6b9c..ef0160741641be58b36291cb0cf42b9594c9cc2d 100644 --- a/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h +++ b/src/UnitTests/Meshes/DistributedMeshes/DistributedGridIO_MPIIOTest.h @@ -80,7 +80,7 @@ class TestDistributedGridMPIIO{ linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr); - String FileName=String("/tmp/test-file.tnl"); + String FileName=String("test-file-mpiio-save.tnl"); DistributedGridIO<MeshFunctionType,MpiIO> ::save(FileName, *meshFunctionptr ); //first process compare results @@ -108,6 +108,7 @@ class TestDistributedGridMPIIO{ { EXPECT_EQ( globalEvaluatedDof.getElement(i), loadDof.getElement(i)) << "Compare Loaded and evaluated Dof Failed for: "<< i; } + EXPECT_EQ( std::remove( FileName.getString()) , 0 ); } } @@ -135,7 +136,7 @@ class TestDistributedGridMPIIO{ SubdomainOverlapsGetter< MeshType, CommunicatorType >::getOverlaps( &distributedGrid, lowerOverlap, upperOverlap, 1 ); distributedGrid.setOverlaps( lowerOverlap, upperOverlap ); - String FileName=String("/tmp/test-file.tnl"); + String FileName=String("/tmp/test-file-mpiio-load.tnl"); //Prepare file if(CommunicatorType::GetRank(CommunicatorType::AllGroup)==0) @@ -178,6 +179,11 @@ class TestDistributedGridMPIIO{ { EXPECT_EQ( evalDof.getElement(i), loadDof.getElement(i)) << "Compare Loaded and evaluated Dof Failed for: "<< i; } + + if(CommunicatorType::GetRank(CommunicatorType::AllGroup)==0) + { + EXPECT_EQ( std::remove( FileName.getString()) , 0 ); + } } };