Loading src/matrix/tnlAdaptiveRgCSRMatrix.h +5 −5 Original line number Diff line number Diff line Loading @@ -392,9 +392,9 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr Index threadsLeft = cudaBlockSize - usedThreads; dbgExpr( usedThreads ); dbgExpr( threadsLeft ); for( Index i = 0; i < threadsLeft; i++) threadsPerRow[ i % rowsInGroup ] ++; /*while( usedThreads < cudaBlockSize ) //for( Index i = 0; i < threadsLeft; i++) // threadsPerRow[ i % rowsInGroup ] ++; while( usedThreads < cudaBlockSize ) { Index maxChunkSize( 0 ); for( Index row = groupBegin; row < groupEnd; row ++ ) Loading @@ -417,7 +417,7 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr usedThreads ++; } } }*/ } /**** * Compute prefix-sum on threadsPerRow and store it in threads Loading Loading @@ -862,7 +862,7 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo sizeof( tnlARGCSRGroupProperties ) + blockDim. x * sizeof( int ); //cudaThreadSetCacheConfig( cudaFuncCachePreferL1 ); cudaThreadSetCacheConfig( cudaFuncCachePreferL1 ); AdaptiveRgCSRMatrixVectorProductKernel< Real, Index > <<< gridDim, blockDim, allocatedSharedMemory >>> ( result. getVector(), Loading tests/benchmarks/share/draw-matrices +3 −3 Original line number Diff line number Diff line Loading @@ -2,9 +2,9 @@ VERBOSE="yes" #PWD=`pwd` TNL_MATRIX_DRAW="tnlgrid-view" #IWD="$PWD" PWD=`pwd` TNL_MATRIX_DRAW="tnl-grid-view" IWD="$PWD" #source ../tnl-env-variables #export TNL_SPARSE_MATRIX_CHECK_CFG_DESC_FILE="$TNL_SOURCE_DIR/tests/tnl-sparse-matrix-check.cfg.desc" Loading Loading
src/matrix/tnlAdaptiveRgCSRMatrix.h +5 −5 Original line number Diff line number Diff line Loading @@ -392,9 +392,9 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr Index threadsLeft = cudaBlockSize - usedThreads; dbgExpr( usedThreads ); dbgExpr( threadsLeft ); for( Index i = 0; i < threadsLeft; i++) threadsPerRow[ i % rowsInGroup ] ++; /*while( usedThreads < cudaBlockSize ) //for( Index i = 0; i < threadsLeft; i++) // threadsPerRow[ i % rowsInGroup ] ++; while( usedThreads < cudaBlockSize ) { Index maxChunkSize( 0 ); for( Index row = groupBegin; row < groupEnd; row ++ ) Loading @@ -417,7 +417,7 @@ bool tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: copyFrom( const tnlCSRMatr usedThreads ++; } } }*/ } /**** * Compute prefix-sum on threadsPerRow and store it in threads Loading Loading @@ -862,7 +862,7 @@ void tnlAdaptiveRgCSRMatrix< Real, Device, Index > :: vectorProduct( const tnlLo sizeof( tnlARGCSRGroupProperties ) + blockDim. x * sizeof( int ); //cudaThreadSetCacheConfig( cudaFuncCachePreferL1 ); cudaThreadSetCacheConfig( cudaFuncCachePreferL1 ); AdaptiveRgCSRMatrixVectorProductKernel< Real, Index > <<< gridDim, blockDim, allocatedSharedMemory >>> ( result. getVector(), Loading
tests/benchmarks/share/draw-matrices +3 −3 Original line number Diff line number Diff line Loading @@ -2,9 +2,9 @@ VERBOSE="yes" #PWD=`pwd` TNL_MATRIX_DRAW="tnlgrid-view" #IWD="$PWD" PWD=`pwd` TNL_MATRIX_DRAW="tnl-grid-view" IWD="$PWD" #source ../tnl-env-variables #export TNL_SPARSE_MATRIX_CHECK_CFG_DESC_FILE="$TNL_SOURCE_DIR/tests/tnl-sparse-matrix-check.cfg.desc" Loading