Loading src/Benchmarks/Traversers/GridTraversersBenchmark.h +2 −2 Original line number Diff line number Diff line Loading @@ -105,7 +105,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > void writeOneUsingParallelFor() { Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Real* data ) auto f = [=] __cuda_callable__ ( Index j, Index i, Real* data ) { data[ i * _size + j ] = 1.0; }; Loading Loading @@ -146,7 +146,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > void writeOneUsingParallelFor() { Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data ) auto f = [=] __cuda_callable__ ( Index k, Index j, Index i, Real* data ) { data[ ( i * _size + j ) * _size + k ] = 1.0; }; Loading src/Benchmarks/Traversers/tnl-benchmark-traversers.h +1 −1 Original line number Diff line number Diff line Loading @@ -64,7 +64,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters, cudaTraverserBenchmark.writeOneUsingParallelFor(); }; benchmark.setOperation( "write 1 using parallel for", size * sizeof( Real ) / oneGB ); benchmark.setOperation( "write 1 using parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.time( reset, "CPU", hostWriteOneUsingParallelFor ); #ifdef HAVE_CUDA benchmark.time( reset, "GPU", cudaWriteOneUsingParallelFor ); Loading Loading
src/Benchmarks/Traversers/GridTraversersBenchmark.h +2 −2 Original line number Diff line number Diff line Loading @@ -105,7 +105,7 @@ class GridTraversersBenchmark< 2, Device, Real, Index > void writeOneUsingParallelFor() { Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Real* data ) auto f = [=] __cuda_callable__ ( Index j, Index i, Real* data ) { data[ i * _size + j ] = 1.0; }; Loading Loading @@ -146,7 +146,7 @@ class GridTraversersBenchmark< 3, Device, Real, Index > void writeOneUsingParallelFor() { Index _size = this->size; auto f = [=] __cuda_callable__ ( Index i, Index j, Index k, Real* data ) auto f = [=] __cuda_callable__ ( Index k, Index j, Index i, Real* data ) { data[ ( i * _size + j ) * _size + k ] = 1.0; }; Loading
src/Benchmarks/Traversers/tnl-benchmark-traversers.h +1 −1 Original line number Diff line number Diff line Loading @@ -64,7 +64,7 @@ bool runBenchmark( const Config::ParameterContainer& parameters, cudaTraverserBenchmark.writeOneUsingParallelFor(); }; benchmark.setOperation( "write 1 using parallel for", size * sizeof( Real ) / oneGB ); benchmark.setOperation( "write 1 using parallel for", pow( ( double ) size, ( double ) Dimension ) * sizeof( Real ) / oneGB ); benchmark.time( reset, "CPU", hostWriteOneUsingParallelFor ); #ifdef HAVE_CUDA benchmark.time( reset, "GPU", cudaWriteOneUsingParallelFor ); Loading