Loading tests/benchmarks/vector-operations.h +134 −28 Original line number Diff line number Diff line Loading @@ -64,12 +64,48 @@ benchmarkVectorOperations( const int & loops, reset12(); cout << "Benchmarking CPU-CPU memory transfer:" << endl; auto copyAssignHostHost = [&]() { hostVector = hostVector2; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssignHostHost, trueFunc, reset1 ); cout << "Benchmarking CPU-GPU memory transfer:" << endl; auto copyAssign = [&]() { auto copyAssignHostCuda = [&]() { deviceVector = hostVector; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssignHostCuda, compare1, reset1 ); cout << "Benchmarking GPU-GPU memory transfer:" << endl; auto copyAssignCudaCuda = [&]() { deviceVector = hostVector; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssign, compare1, reset1 ); benchmarkSingle( loops, datasetSize, copyAssignCudaCuda, trueFunc, reset1 ); cout << endl; cout << "Benchmarking tnlVector.operator==" << endl; auto compareHost = [&]() { resultHost = (int) hostVector == hostVector2; }; auto compareCuda = [&]() { resultDevice = (int) deviceVector == deviceVector2; }; benchmarkCuda( loops, 2 * datasetSize, compareHost, compareCuda, compareScalars, voidFunc ); cout << "Benchmarking scalar multiplication:" << endl; auto multiplyHost = [&]() { hostVector *= 0.5; }; auto multiplyCuda = [&]() { deviceVector *= 0.5; }; benchmarkCuda( loops, 2 * datasetSize, multiplyHost, multiplyCuda, compare1, reset1 ); cout << "Benchmarking vector addition:" << endl; Loading @@ -82,6 +118,86 @@ benchmarkVectorOperations( const int & loops, benchmarkCuda( loops, 3 * datasetSize, addVectorHost, addVectorCuda, compare1, reset1 ); cout << "Benchmarking max:" << endl; auto maxHost = [&]() { resultHost = hostVector.max(); }; auto maxCuda = [&]() { resultDevice = deviceVector.max(); }; benchmarkCuda( loops, datasetSize, maxHost, maxCuda, compareScalars, voidFunc ); cout << "Benchmarking min:" << endl; auto minHost = [&]() { resultHost = hostVector.min(); }; auto minCuda = [&]() { resultDevice = deviceVector.min(); }; benchmarkCuda( loops, datasetSize, minHost, minCuda, compareScalars, voidFunc ); cout << "Benchmarking absMax:" << endl; auto absMaxHost = [&]() { resultHost = hostVector.absMax(); }; auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; benchmarkCuda( loops, datasetSize, absMaxHost, absMaxCuda, compareScalars, voidFunc ); cout << "Benchmarking absMin:" << endl; auto absMinHost = [&]() { resultHost = hostVector.absMin(); }; auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; benchmarkCuda( loops, datasetSize, absMinHost, absMinCuda, compareScalars, voidFunc ); cout << "Benchmarking sum:" << endl; auto sumHost = [&]() { resultHost = hostVector.sum(); }; auto sumCuda = [&]() { resultDevice = deviceVector.sum(); }; benchmarkCuda( loops, datasetSize, sumHost, sumCuda, compareScalars, voidFunc ); cout << "Benchmarking l1 norm: " << endl; auto l1normHost = [&]() { resultHost = hostVector.lpNorm( 1.0 ); }; auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; benchmarkCuda( loops, datasetSize, l1normHost, l1normCuda, compareScalars, voidFunc ); cout << "Benchmarking l2 norm: " << endl; auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; benchmarkCuda( loops, datasetSize, l2normHost, l2normCuda, compareScalars, voidFunc ); cout << "Benchmarking l3 norm: " << endl; auto l3normHost = [&]() { resultHost = hostVector.lpNorm( 3.0 ); }; auto l3normCuda = [&]() { resultDevice = deviceVector.lpNorm( 3.0 ); }; benchmarkCuda( loops, datasetSize, l3normHost, l3normCuda, compareScalars, voidFunc ); cout << "Benchmarking scalar product:" << endl; auto scalarProductHost = [&]() { resultHost = hostVector.scalarProduct( hostVector2 ); Loading Loading @@ -111,16 +227,6 @@ benchmarkVectorOperations( const int & loops, #endif */ cout << "Benchmarking L2 norm: " << endl; auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; benchmarkCuda( loops, datasetSize, l2normHost, l2normCuda, compareScalars, voidFunc ); /* cout << "Benchmarking prefix-sum:" << endl; timer.reset(); Loading Loading
tests/benchmarks/vector-operations.h +134 −28 Original line number Diff line number Diff line Loading @@ -64,12 +64,48 @@ benchmarkVectorOperations( const int & loops, reset12(); cout << "Benchmarking CPU-CPU memory transfer:" << endl; auto copyAssignHostHost = [&]() { hostVector = hostVector2; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssignHostHost, trueFunc, reset1 ); cout << "Benchmarking CPU-GPU memory transfer:" << endl; auto copyAssign = [&]() { auto copyAssignHostCuda = [&]() { deviceVector = hostVector; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssignHostCuda, compare1, reset1 ); cout << "Benchmarking GPU-GPU memory transfer:" << endl; auto copyAssignCudaCuda = [&]() { deviceVector = hostVector; }; cout << " "; benchmarkSingle( loops, datasetSize, copyAssign, compare1, reset1 ); benchmarkSingle( loops, datasetSize, copyAssignCudaCuda, trueFunc, reset1 ); cout << endl; cout << "Benchmarking tnlVector.operator==" << endl; auto compareHost = [&]() { resultHost = (int) hostVector == hostVector2; }; auto compareCuda = [&]() { resultDevice = (int) deviceVector == deviceVector2; }; benchmarkCuda( loops, 2 * datasetSize, compareHost, compareCuda, compareScalars, voidFunc ); cout << "Benchmarking scalar multiplication:" << endl; auto multiplyHost = [&]() { hostVector *= 0.5; }; auto multiplyCuda = [&]() { deviceVector *= 0.5; }; benchmarkCuda( loops, 2 * datasetSize, multiplyHost, multiplyCuda, compare1, reset1 ); cout << "Benchmarking vector addition:" << endl; Loading @@ -82,6 +118,86 @@ benchmarkVectorOperations( const int & loops, benchmarkCuda( loops, 3 * datasetSize, addVectorHost, addVectorCuda, compare1, reset1 ); cout << "Benchmarking max:" << endl; auto maxHost = [&]() { resultHost = hostVector.max(); }; auto maxCuda = [&]() { resultDevice = deviceVector.max(); }; benchmarkCuda( loops, datasetSize, maxHost, maxCuda, compareScalars, voidFunc ); cout << "Benchmarking min:" << endl; auto minHost = [&]() { resultHost = hostVector.min(); }; auto minCuda = [&]() { resultDevice = deviceVector.min(); }; benchmarkCuda( loops, datasetSize, minHost, minCuda, compareScalars, voidFunc ); cout << "Benchmarking absMax:" << endl; auto absMaxHost = [&]() { resultHost = hostVector.absMax(); }; auto absMaxCuda = [&]() { resultDevice = deviceVector.absMax(); }; benchmarkCuda( loops, datasetSize, absMaxHost, absMaxCuda, compareScalars, voidFunc ); cout << "Benchmarking absMin:" << endl; auto absMinHost = [&]() { resultHost = hostVector.absMin(); }; auto absMinCuda = [&]() { resultDevice = deviceVector.absMin(); }; benchmarkCuda( loops, datasetSize, absMinHost, absMinCuda, compareScalars, voidFunc ); cout << "Benchmarking sum:" << endl; auto sumHost = [&]() { resultHost = hostVector.sum(); }; auto sumCuda = [&]() { resultDevice = deviceVector.sum(); }; benchmarkCuda( loops, datasetSize, sumHost, sumCuda, compareScalars, voidFunc ); cout << "Benchmarking l1 norm: " << endl; auto l1normHost = [&]() { resultHost = hostVector.lpNorm( 1.0 ); }; auto l1normCuda = [&]() { resultDevice = deviceVector.lpNorm( 1.0 ); }; benchmarkCuda( loops, datasetSize, l1normHost, l1normCuda, compareScalars, voidFunc ); cout << "Benchmarking l2 norm: " << endl; auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; benchmarkCuda( loops, datasetSize, l2normHost, l2normCuda, compareScalars, voidFunc ); cout << "Benchmarking l3 norm: " << endl; auto l3normHost = [&]() { resultHost = hostVector.lpNorm( 3.0 ); }; auto l3normCuda = [&]() { resultDevice = deviceVector.lpNorm( 3.0 ); }; benchmarkCuda( loops, datasetSize, l3normHost, l3normCuda, compareScalars, voidFunc ); cout << "Benchmarking scalar product:" << endl; auto scalarProductHost = [&]() { resultHost = hostVector.scalarProduct( hostVector2 ); Loading Loading @@ -111,16 +227,6 @@ benchmarkVectorOperations( const int & loops, #endif */ cout << "Benchmarking L2 norm: " << endl; auto l2normHost = [&]() { resultHost = hostVector.lpNorm( 2.0 ); }; auto l2normCuda = [&]() { resultDevice = deviceVector.lpNorm( 2.0 ); }; benchmarkCuda( loops, datasetSize, l2normHost, l2normCuda, compareScalars, voidFunc ); /* cout << "Benchmarking prefix-sum:" << endl; timer.reset(); Loading