Loading src/matrices/tnlCSRMatrix_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -672,7 +672,7 @@ void tnlCSRMatrix< Real, Device, Index >::spmvCudaVectorized( const InVector& in if( inWarpIdx < 2 ) aux[ threadIdx.x ] += aux[ threadIdx.x + 2 ]; if( warpSize >= 2 ) if( inWarpIdx < 1 ) aux[ threadIdx.x ] += aux[ threadIdx.x + 1 ]; __syncthreads(); // TODO: I am not sure why __syncthreads(); // TODO: I am not sure why - aux must be volatile if( inWarpIdx == 0 ) outVector[ row ] = aux[ threadIdx.x ]; Loading tests/benchmarks/tnl-benchmark-spmv.h +2 −2 Original line number Diff line number Diff line Loading @@ -393,7 +393,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) cusparseDestroy( cusparseHandle ); cout << " done. \r"; cudaCSRMatrix.setCudaKernelType( CSRMatrixCudaType::scalar ); /*cudaCSRMatrix.setCudaKernelType( CSRMatrixCudaType::scalar ); benchmarkMatrix( cudaCSRMatrix, cudaX, cudaB, Loading Loading @@ -524,7 +524,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, baseline, verbose, logFile ); logFile );*/ } cudaCSRMatrix.reset(); #endif Loading Loading
src/matrices/tnlCSRMatrix_impl.h +1 −1 Original line number Diff line number Diff line Loading @@ -672,7 +672,7 @@ void tnlCSRMatrix< Real, Device, Index >::spmvCudaVectorized( const InVector& in if( inWarpIdx < 2 ) aux[ threadIdx.x ] += aux[ threadIdx.x + 2 ]; if( warpSize >= 2 ) if( inWarpIdx < 1 ) aux[ threadIdx.x ] += aux[ threadIdx.x + 1 ]; __syncthreads(); // TODO: I am not sure why __syncthreads(); // TODO: I am not sure why - aux must be volatile if( inWarpIdx == 0 ) outVector[ row ] = aux[ threadIdx.x ]; Loading
tests/benchmarks/tnl-benchmark-spmv.h +2 −2 Original line number Diff line number Diff line Loading @@ -393,7 +393,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) cusparseDestroy( cusparseHandle ); cout << " done. \r"; cudaCSRMatrix.setCudaKernelType( CSRMatrixCudaType::scalar ); /*cudaCSRMatrix.setCudaKernelType( CSRMatrixCudaType::scalar ); benchmarkMatrix( cudaCSRMatrix, cudaX, cudaB, Loading Loading @@ -524,7 +524,7 @@ bool setupBenchmark( const tnlParameterContainer& parameters ) stopTime, baseline, verbose, logFile ); logFile );*/ } cudaCSRMatrix.reset(); #endif Loading