Loading CMakeLists.txt +12 −2 Original line number Diff line number Diff line Loading @@ -33,7 +33,11 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) endif() if( WITH_CUDA STREQUAL "yes" ) <<<<<<< HEAD AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" ) ======= AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 -shared" ) >>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3 else() AddCompilerFlag( "-std=gnu++0x" ) endif() Loading @@ -49,12 +53,18 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) <<<<<<< HEAD set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) ======= set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--compiler-options '-fPIC','-shared') if( CUDA_ARCHITECTURE STREQUAL "2.0" ) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch sm_20;-DCUDA_ARCH=20) >>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3 set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() if( CUDA_ARCHITECTURE STREQUAL "2.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_21;-DCUDA_ARCH=21) endif() if( CUDA_ARCHITECTURE STREQUAL "2.2" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_22;-DCUDA_ARCH=22) Loading @@ -63,7 +73,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_23;-DCUDA_ARCH=23) endif() if( CUDA_ARCHITECTURE STREQUAL "3.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_30;-DCUDA_ARCH=30) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_30;-code=sm_30;-DCUDA_ARCH=30) endif() #### Loading install +12 −10 Original line number Diff line number Diff line Loading @@ -10,6 +10,8 @@ VERBOSE="VERBOSE=1" CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS="1" echo "Building $TARGET using $CPUS processors." Loading @@ -34,14 +36,14 @@ make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install #cd ../Release #${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ # -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ # -DWITH_CUDA=${WITH_CUDA} \ # -DWITH_CUSPARSE=${WITH_CUSPARSE} \ # -DPETSC_DIR=${PETSC_DIR} \ # -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} #make -j${CPUS} ${VERBOSE} #make -j${CPUS} test #make -j${CPUS} install src/implementation/core/cuda/cuda-reduction-abs-max_impl.cu +1 −0 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > ( const tnlParallelReductionAbsMax< char, long int >& operation, const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, Loading tests/benchmarks/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -10,7 +10,8 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu OPTIONS -arch sm_20 -shared ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ELSE() ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) Loading tests/unit-tests/CMakeLists.txt +4 −2 Original line number Diff line number Diff line Loading @@ -18,11 +18,13 @@ set( headers tnlUnitTestStarter.h solver/tnlMersonSolverTester.h ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu ) CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu OPTIONS -arch sm_20 -shared ) TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) else() ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp ) ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp OPTIONS -arch sm_20 -shared ) TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) endif() Loading Loading
CMakeLists.txt +12 −2 Original line number Diff line number Diff line Loading @@ -33,7 +33,11 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" ) endif() if( WITH_CUDA STREQUAL "yes" ) <<<<<<< HEAD AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" ) ======= AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 -shared" ) >>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3 else() AddCompilerFlag( "-std=gnu++0x" ) endif() Loading @@ -49,12 +53,18 @@ if( NOT WITH_CUDA STREQUAL "no" ) set(BUILD_SHARED_LIBS ON) set(CUDA_SEPARABLE_COMPILATION ON) <<<<<<< HEAD set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA) if( CUDA_ARCHITECTURE STREQUAL "2.0" ) ======= set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--compiler-options '-fPIC','-shared') if( CUDA_ARCHITECTURE STREQUAL "2.0" ) #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch sm_20;-DCUDA_ARCH=20) >>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3 set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20) endif() if( CUDA_ARCHITECTURE STREQUAL "2.1" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_21;-DCUDA_ARCH=21) endif() if( CUDA_ARCHITECTURE STREQUAL "2.2" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_22;-DCUDA_ARCH=22) Loading @@ -63,7 +73,7 @@ if( NOT WITH_CUDA STREQUAL "no" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_23;-DCUDA_ARCH=23) endif() if( CUDA_ARCHITECTURE STREQUAL "3.0" ) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_30;-DCUDA_ARCH=30) set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_30;-code=sm_30;-DCUDA_ARCH=30) endif() #### Loading
install +12 −10 Original line number Diff line number Diff line Loading @@ -10,6 +10,8 @@ VERBOSE="VERBOSE=1" CMAKE="cmake" CPUS=`grep -c processor /proc/cpuinfo` CPUS="1" echo "Building $TARGET using $CPUS processors." Loading @@ -34,14 +36,14 @@ make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install cd ../Release ${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ -DWITH_CUDA=${WITH_CUDA} \ -DWITH_CUSPARSE=${WITH_CUSPARSE} \ -DPETSC_DIR=${PETSC_DIR} \ -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} make -j${CPUS} ${VERBOSE} make -j${CPUS} test make -j${CPUS} install #cd ../Release #${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \ # -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \ # -DWITH_CUDA=${WITH_CUDA} \ # -DWITH_CUSPARSE=${WITH_CUSPARSE} \ # -DPETSC_DIR=${PETSC_DIR} \ # -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION} #make -j${CPUS} ${VERBOSE} #make -j${CPUS} test #make -j${CPUS} install
src/implementation/core/cuda/cuda-reduction-abs-max_impl.cu +1 −0 Original line number Diff line number Diff line Loading @@ -59,6 +59,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2, typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result ); template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > > ( const tnlParallelReductionAbsMax< char, long int >& operation, const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size, Loading
tests/benchmarks/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -10,7 +10,8 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu ) CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu OPTIONS -arch sm_20 -shared ) SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" ) ELSE() ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp ) Loading
tests/unit-tests/CMakeLists.txt +4 −2 Original line number Diff line number Diff line Loading @@ -18,11 +18,13 @@ set( headers tnlUnitTestStarter.h solver/tnlMersonSolverTester.h ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu ) CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu OPTIONS -arch sm_20 -shared ) TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) else() ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp ) ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp OPTIONS -arch sm_20 -shared ) TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES} tnl${mpiExt}${debugExt}-0.1 ) endif() Loading