Loading CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode Loading src/Benchmarks/BLAS/array-operations.h +3 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ benchmarkArrayOperations( Benchmark & benchmark, deviceArray2.setSize( size ); #endif Real resultHost, resultDevice; Real resultHost; // reset functions Loading Loading @@ -104,6 +104,7 @@ benchmarkArrayOperations( Benchmark & benchmark, benchmark.setOperation( "comparison (operator==)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", compareHost ); #ifdef HAVE_CUDA Real resultDevice; auto compareCuda = [&]() { resultDevice = (int) ( deviceArray == deviceArray2 ); }; Loading @@ -118,6 +119,7 @@ benchmarkArrayOperations( Benchmark & benchmark, // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will // complain when compiling without CUDA const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); (void)copyBasetime; // ignore unused variable #ifdef HAVE_CUDA auto copyAssignCudaCuda = [&]() { deviceArray = deviceArray2; Loading src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +1 −1 Original line number Diff line number Diff line Loading @@ -241,7 +241,7 @@ struct SpmvBenchmark const auto gi = distributedMatrix.getLocalRowRange().getGlobalIndex( i ); distributedRowLengths[ gi ] = matrix.getRowCapacity( gi ); } distributedMatrix.setCompressedRowLengths( distributedRowLengths ); distributedMatrix.setRowCapacities( distributedRowLengths ); // copy data from the global matrix/vector into the distributed matrix/vector for( IndexType i = 0; i < distributedMatrix.getLocalMatrix().getRows(); i++ ) { Loading src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -646,7 +646,6 @@ HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Differenti applyBoundaryConditions( const RealType& time, DofVectorPointer& uDofs ) { const MeshPointer& mesh = this->getMesh(); if( this->cudaKernelType == "templated" ) { this->bindDofs( uDofs ); Loading Loading @@ -698,6 +697,7 @@ applyBoundaryConditions( const RealType& time, #endif userData.boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >(); Meshes::Traverser< MeshType, Cell > meshTraverser; const MeshPointer& mesh = this->getMesh(); // */ /*meshTraverser.template processBoundaryEntities< BoundaryEntitiesProcessor > ( mesh, Loading src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +2 −2 Original line number Diff line number Diff line Loading @@ -457,13 +457,13 @@ struct LinearSolversBenchmark DistributedVector dist_x0( localRange, matrixPointer->getRows(), group ); DistributedVector dist_b( localRange, matrixPointer->getRows(), group ); // copy the row lengths from the global matrix to the distributed matrix // copy the row capacities from the global matrix to the distributed matrix DistributedRowLengths distributedRowLengths( localRange, matrixPointer->getRows(), group ); for( IndexType i = 0; i < distMatrixPointer->getLocalMatrix().getRows(); i++ ) { const auto gi = distMatrixPointer->getLocalRowRange().getGlobalIndex( i ); distributedRowLengths[ gi ] = matrixPointer->getRowCapacity( gi ); } distMatrixPointer->setCompressedRowLengths( distributedRowLengths ); distMatrixPointer->setRowCapacities( distributedRowLengths ); // copy data from the global matrix/vector into the distributed matrix/vector for( IndexType i = 0; i < distMatrixPointer->getLocalMatrix().getRows(); i++ ) { Loading Loading
CMakeLists.txt +1 −1 Original line number Diff line number Diff line Loading @@ -83,7 +83,7 @@ set( CMAKE_CXX_STANDARD_REQUIRED ON ) set( CMAKE_CXX_EXTENSIONS OFF ) # set default build options set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unused-variable -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wall -Wno-unused-local-typedefs -Wno-unknown-pragmas" ) set( CMAKE_CXX_FLAGS_DEBUG "-g" ) set( CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" ) # pass -rdynamic only in Debug mode Loading
src/Benchmarks/BLAS/array-operations.h +3 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ benchmarkArrayOperations( Benchmark & benchmark, deviceArray2.setSize( size ); #endif Real resultHost, resultDevice; Real resultHost; // reset functions Loading Loading @@ -104,6 +104,7 @@ benchmarkArrayOperations( Benchmark & benchmark, benchmark.setOperation( "comparison (operator==)", 2 * datasetSize ); benchmark.time< Devices::Host >( reset1, "CPU", compareHost ); #ifdef HAVE_CUDA Real resultDevice; auto compareCuda = [&]() { resultDevice = (int) ( deviceArray == deviceArray2 ); }; Loading @@ -118,6 +119,7 @@ benchmarkArrayOperations( Benchmark & benchmark, // copyBasetime is used later inside HAVE_CUDA guard, so the compiler will // complain when compiling without CUDA const double copyBasetime = benchmark.time< Devices::Host >( reset1, "CPU", copyAssignHostHost ); (void)copyBasetime; // ignore unused variable #ifdef HAVE_CUDA auto copyAssignCudaCuda = [&]() { deviceArray = deviceArray2; Loading
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +1 −1 Original line number Diff line number Diff line Loading @@ -241,7 +241,7 @@ struct SpmvBenchmark const auto gi = distributedMatrix.getLocalRowRange().getGlobalIndex( i ); distributedRowLengths[ gi ] = matrix.getRowCapacity( gi ); } distributedMatrix.setCompressedRowLengths( distributedRowLengths ); distributedMatrix.setRowCapacities( distributedRowLengths ); // copy data from the global matrix/vector into the distributed matrix/vector for( IndexType i = 0; i < distributedMatrix.getLocalMatrix().getRows(); i++ ) { Loading
src/Benchmarks/HeatEquation/HeatEquationBenchmarkProblem_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -646,7 +646,6 @@ HeatEquationBenchmarkProblem< Mesh, BoundaryCondition, RightHandSide, Differenti applyBoundaryConditions( const RealType& time, DofVectorPointer& uDofs ) { const MeshPointer& mesh = this->getMesh(); if( this->cudaKernelType == "templated" ) { this->bindDofs( uDofs ); Loading Loading @@ -698,6 +697,7 @@ applyBoundaryConditions( const RealType& time, #endif userData.boundaryConditions = &this->boundaryConditionPointer.template getData< Devices::Cuda >(); Meshes::Traverser< MeshType, Cell > meshTraverser; const MeshPointer& mesh = this->getMesh(); // */ /*meshTraverser.template processBoundaryEntities< BoundaryEntitiesProcessor > ( mesh, Loading
src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +2 −2 Original line number Diff line number Diff line Loading @@ -457,13 +457,13 @@ struct LinearSolversBenchmark DistributedVector dist_x0( localRange, matrixPointer->getRows(), group ); DistributedVector dist_b( localRange, matrixPointer->getRows(), group ); // copy the row lengths from the global matrix to the distributed matrix // copy the row capacities from the global matrix to the distributed matrix DistributedRowLengths distributedRowLengths( localRange, matrixPointer->getRows(), group ); for( IndexType i = 0; i < distMatrixPointer->getLocalMatrix().getRows(); i++ ) { const auto gi = distMatrixPointer->getLocalRowRange().getGlobalIndex( i ); distributedRowLengths[ gi ] = matrixPointer->getRowCapacity( gi ); } distMatrixPointer->setCompressedRowLengths( distributedRowLengths ); distMatrixPointer->setRowCapacities( distributedRowLengths ); // copy data from the global matrix/vector into the distributed matrix/vector for( IndexType i = 0; i < distMatrixPointer->getLocalMatrix().getRows(); i++ ) { Loading