Commit e8600bdc authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Improving buildAll script.

Small fix in RgCSR kernel.
parent 07502840
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
#!/bin/bash

INSTALL_PREFIX=${HOME}/local
WITH_CUDA=yes
CUDA_ARCHITECTURE=2.0
VERBOSE=0

CPUS=`grep -c processor /proc/cpuinfo`

@@ -16,13 +19,13 @@ then
fi

cd Debug
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no
make -j${CPUS} #VERBOSE=1
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA}
make -j${CPUS} #VERBOSE=${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install

cd ../Release
cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=1.2 -DWITH_CUDA=no
make -j${CPUS} #VERBOSE=1
cmake .. -DCMAKE_INSTALL_PREFIX=${HOME}/local -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} -DWITH_CUDA=${WITH_CUDA}
make -j${CPUS} #VERBOSE=${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install
+3 −3
Original line number Diff line number Diff line
@@ -1107,9 +1107,9 @@ __global__ void tnlRgCSRMatrixAdpativeGroupSizeVectorProductKernel( const Index
   for( Index i = threadIndexInRow; i < nonzeros; i += threadsPerRow )
   {
      const Index column = columns[ pos ];
      if( column == -1 )
         printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n",
                rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i );
      //if( column == -1 )
      //   printf( "* rowInMatrix = %d blockIdx. x = %d threadIdx. x = %d threadIndexInRow = %d i = %d \n",
      //          rowInMatrix, blockIndex, threadIdx. x, threadIndexInRow, i );
      if( column != -1 )
         partialSums[ threadIdx. x ] += nonzeroElements[ pos ] * vec_x[ column ];
      //if( rowInMatrix == 0 )