Loading buildAll +7 −4 Original line number Diff line number Diff line #!/bin/bash INSTALL_PREFIX=${HOME}/local WITH_CUDA=yes CUDA_ARCHITECTURE=2.0 VERBOSE=0 CPUS=`grep -c processor /proc/cpuinfo` Loading @@ -16,13 +19,13 @@ then fi cd Debug cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no make -j${CPUS} #VERBOSE=1 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} make -j${CPUS} #VERBOSE=${VERBOSE} make -j${CPUS} test make -j${CPUS} install cd ../Release cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no make -j${CPUS} #VERBOSE=1 cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} make -j${CPUS} #VERBOSE=${VERBOSE} make -j${CPUS} test make -j${CPUS} install src/matrix/tnlRgCSRMatrix.h +3 −3 Original line number Diff line number Diff line Loading @@ -1107,9 +1107,9 @@ __global__ void tnlRgCSRMatrixAdpativeGroupSizeVectorProductKernel( const Index for( Index i = threadIndexInRow; i < nonzeros; i += threadsPerRow ) { const Index column = columns[ pos ]; if( column == -1 ) printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n", rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i ); //if( column == -1 ) // printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n", // rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i ); if( column != -1 ) partialSums[ threadIdx. x ] += nonzeroElements[ pos ] * vec_x[ column ]; //if( rowInMatrix == 0 ) Loading Loading
buildAll +7 −4 Original line number Diff line number Diff line #!/bin/bash INSTALL_PREFIX=${HOME}/local WITH_CUDA=yes CUDA_ARCHITECTURE=2.0 VERBOSE=0 CPUS=`grep -c processor /proc/cpuinfo` Loading @@ -16,13 +19,13 @@ then fi cd Debug cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no make -j${CPUS} #VERBOSE=1 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} make -j${CPUS} #VERBOSE=${VERBOSE} make -j${CPUS} test make -j${CPUS} install cd ../Release cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no make -j${CPUS} #VERBOSE=1 cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA} make -j${CPUS} #VERBOSE=${VERBOSE} make -j${CPUS} test make -j${CPUS} install
src/matrix/tnlRgCSRMatrix.h +3 −3 Original line number Diff line number Diff line Loading @@ -1107,9 +1107,9 @@ __global__ void tnlRgCSRMatrixAdpativeGroupSizeVectorProductKernel( const Index for( Index i = threadIndexInRow; i < nonzeros; i += threadsPerRow ) { const Index column = columns[ pos ]; if( column == -1 ) printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n", rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i ); //if( column == -1 ) // printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n", // rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i ); if( column != -1 ) partialSums[ threadIdx. x ] += nonzeroElements[ pos ] * vec_x[ column ]; //if( rowInMatrix == 0 ) Loading