diff --git a/tests/benchmarks/tnl-cuda-benchmarks.h b/tests/benchmarks/tnl-cuda-benchmarks.h
index 17ae5c04b3591585f4d66475cc2ec45e017afbad..2c11b5bc7636929c68112b62918a426787574f1b 100644
--- a/tests/benchmarks/tnl-cuda-benchmarks.h
+++ b/tests/benchmarks/tnl-cuda-benchmarks.h
@@ -156,7 +156,7 @@ int main( int argc, char* argv[] )
      resultHost = hostVector.lpNorm( 2.0 );
    timer.stop();
    timeHost = timer.getTime();
-   bandwidth = 2 * datasetSize / timer.getTime();
+   bandwidth = datasetSize / timer.getTime();
    cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
     
    cout << "Benchmarking lpNorm on GPU: ";
@@ -166,7 +166,7 @@ int main( int argc, char* argv[] )
       resultDevice = deviceVector.lpNorm( 2.0 );
    timer.stop();
    timeDevice = timer.getTime();
-   bandwidth = 2 * datasetSize / timer.getTime();
+   bandwidth = datasetSize / timer.getTime();
    cout << "bandwidth: " << bandwidth << " GB/sec, time: " << timer.getTime() << " sec." << endl;
    cout << "CPU/GPU speedup: " << timeHost / timeDevice << endl;