Commit f441e221 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'matrices-gpu' of...

Merge branch 'matrices-gpu' of ssh://geraldine.fjfi.cvut.cz:2222/local/projects/tnl/tnl into matrices-gpu

Conflicts:
	CMakeLists.txt
	src/CMakeLists.txt
parents 15a59c5f 5f49d192
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -33,7 +33,11 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" )
endif()   

if( WITH_CUDA STREQUAL "yes" )
<<<<<<< HEAD
   AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" )
=======
   AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 -shared" )
>>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3
else()
   AddCompilerFlag( "-std=gnu++0x" )
endif()      
@@ -49,12 +53,18 @@ if( NOT WITH_CUDA STREQUAL "no" )
        set(BUILD_SHARED_LIBS ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
        
<<<<<<< HEAD
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA)
        if( CUDA_ARCHITECTURE STREQUAL "2.0" )
=======
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--compiler-options '-fPIC','-shared')
        if( CUDA_ARCHITECTURE STREQUAL "2.0" )
            #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch sm_20;-DCUDA_ARCH=20)
>>>>>>> 5f49d192b9684738aef0eb832461b54b25cadda3
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)            
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "2.1" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_21;-DCUDA_ARCH=21)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "2.2" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_22;-DCUDA_ARCH=22)
@@ -63,7 +73,7 @@ if( NOT WITH_CUDA STREQUAL "no" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_23;-DCUDA_ARCH=23)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "3.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_30;-DCUDA_ARCH=30)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_30;-code=sm_30;-DCUDA_ARCH=30)
        endif()    
    
        ####
+12 −10
Original line number Diff line number Diff line
@@ -10,6 +10,8 @@ VERBOSE="VERBOSE=1"

CMAKE="cmake"
CPUS=`grep -c processor /proc/cpuinfo`
CPUS="1"


echo "Building $TARGET using $CPUS processors."

@@ -34,14 +36,14 @@ make -j${CPUS} ${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install

cd ../Release
${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \
            -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \
            -DWITH_CUDA=${WITH_CUDA} \
            -DWITH_CUSPARSE=${WITH_CUSPARSE} \
            -DPETSC_DIR=${PETSC_DIR} \
            -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION}
make -j${CPUS} ${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install
#cd ../Release
#${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \
#            -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \
#            -DWITH_CUDA=${WITH_CUDA} \
#            -DWITH_CUSPARSE=${WITH_CUSPARSE} \
#            -DPETSC_DIR=${PETSC_DIR} \
#            -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION}
#make -j${CPUS} ${VERBOSE}
#make -j${CPUS} test
#make -j${CPUS} install
+1 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in
                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
                                     

template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
                                   ( const tnlParallelReductionAbsMax< char, long int >& operation,
                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
+2 −1
Original line number Diff line number Diff line
@@ -10,7 +10,8 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h
     

IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu )
    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu
                         OPTIONS -arch sm_20 -shared )
    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )        
ELSE()
    ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp )
+4 −2
Original line number Diff line number Diff line
@@ -18,11 +18,13 @@ set( headers tnlUnitTestStarter.h
             solver/tnlMersonSolverTester.h )
             
if( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu )
    CUDA_ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cu
                         OPTIONS -arch sm_20 -shared )
    TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                              tnl${mpiExt}${debugExt}-0.1 )
else()
    ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp )
    ADD_EXECUTABLE( tnl-unit-tests${mpiExt}${debugExt} ${headers} tnl-unit-tests.cpp 
                    OPTIONS -arch sm_20 -shared )
    TARGET_LINK_LIBRARIES( tnl-unit-tests${mpiExt}${debugExt} ${CPPUNIT_LIBRARIES}
                                                              tnl${mpiExt}${debugExt}-0.1 )
endif()
Loading