Commit 5f49d192 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Working on the separate compilation with nvcc.

parent 03332aa8
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -33,8 +33,7 @@ if( WITH_TEMPLATE_EXPLICIT_INSTANTIATION STREQUAL "yes" )
endif()   

if( WITH_CUDA STREQUAL "yes" )
   #AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 --relocatable-device-code=yes --device-c" )
   AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128" )
   AddCompilerFlag( "-DHAVE_NOT_CXX11 -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 -shared" )
else()
   AddCompilerFlag( "-std=gnu++0x" )
endif()      
@@ -50,13 +49,13 @@ if( NOT WITH_CUDA STREQUAL "no" )
        set(BUILD_SHARED_LIBS ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
        
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--shared;--compiler-options -fPIC)
        set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DHAVE_CUDA;--compiler-options '-fPIC','-shared')
        if( CUDA_ARCHITECTURE STREQUAL "2.0" )
            #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_20;-DCUDA_ARCH=20)
            #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch sm_20;-DCUDA_ARCH=20)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DCUDA_ARCH=20)            
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "2.1" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_21;-DCUDA_ARCH=21)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_20;-code=sm_21;-DCUDA_ARCH=21)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "2.2" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_22;-DCUDA_ARCH=22)
@@ -65,7 +64,7 @@ if( NOT WITH_CUDA STREQUAL "no" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_23;-DCUDA_ARCH=23)
        endif()    
        if( CUDA_ARCHITECTURE STREQUAL "3.0" )
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_30;-DCUDA_ARCH=30)
            set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=compute_30;-code=sm_30;-DCUDA_ARCH=30)
        endif()    
    
        ####
+12 −10
Original line number Diff line number Diff line
@@ -10,6 +10,8 @@ VERBOSE="VERBOSE=1"

CMAKE="cmake"
CPUS=`grep -c processor /proc/cpuinfo`
CPUS="1"


echo "Building $TARGET using $CPUS processors."

@@ -34,14 +36,14 @@ make -j${CPUS} ${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install

cd ../Release
${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \
            -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \
            -DWITH_CUDA=${WITH_CUDA} \
            -DWITH_CUSPARSE=${WITH_CUSPARSE} \
            -DPETSC_DIR=${PETSC_DIR} \
            -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION}
make -j${CPUS} ${VERBOSE}
make -j${CPUS} test
make -j${CPUS} install
#cd ../Release
#${CMAKE} .. -DCMAKE_INSTALL_PREFIX=${HOME}/local \
#            -DCUDA_ARCHITECTURE=${CUDA_ARCHITECTURE} \
#            -DWITH_CUDA=${WITH_CUDA} \
#            -DWITH_CUSPARSE=${WITH_CUSPARSE} \
#            -DPETSC_DIR=${PETSC_DIR} \
#            -DWITH_TEMPLATE_EXPLICIT_INSTANTIATION=${TEMPLATE_EXPLICIT_INSTANTIATION}
#make -j${CPUS} ${VERBOSE}
#make -j${CPUS} test
#make -j${CPUS} install
+3 −2
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ set( tnl_CUDA__SOURCES ${tnl_generators_CUDA__SOURCES}
                 
if( BUILD_CUDA )
   CUDA_ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES}
                                                  OPTIONS -arch sm_20 )
                                                  OPTIONS -arch sm_20 -shared --compiler-options '-fPIC','-shared' )
else( BUILD_CUDA )
   ADD_LIBRARY( tnl${debugExt}-${tnlVersion} SHARED 
                ${tnl_SOURCES} )
@@ -47,7 +47,8 @@ IF( BUILD_MPI )
   
   if( BUILD_CUDA )
      CUDA_ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED ${tnl_CUDA__SOURCES} 
                                                         OPTIONS -arch sm_20 )
                                                         OPTIONS -arch sm_20 -shared --compiler-options '-fPIC','-shared' )
#-arch sm_20 -shared --linker-options '-fPIC','-shared'                                                         
   else( BUILD_CUDA )
         ADD_LIBRARY( tnl-mpi${debugExt}-${tnlVersion} SHARED
                      ${tnl_SOURCES} )  
+1 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< long double, in
                                     const typename tnlParallelReductionAbsMax< long double, int > :: RealType* deviceInput2,
                                     typename tnlParallelReductionAbsMax< long double, int> :: ResultType& result );
                                     

template bool reductionOnCudaDevice< tnlParallelReductionAbsMax< char, long int > >
                                   ( const tnlParallelReductionAbsMax< char, long int >& operation,
                                     const typename tnlParallelReductionAbsMax< char, long int > :: IndexType size,
+2 −1
Original line number Diff line number Diff line
@@ -10,7 +10,8 @@ SET( tnlSpmvBenchmark_headers sparse-matrix-benchmark.h
     

IF( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu )
    CUDA_ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cu
                         OPTIONS -arch sm_20 -shared )
    SET_TARGET_PROPERTIES( tnl-sparse-matrix-benchmark${debugExt} PROPERTIES CUDA_COMPILE_FLAGS "${CXX_OPTIMIZE_FLAGS}" )        
ELSE()
    ADD_EXECUTABLE( tnl-sparse-matrix-benchmark${debugExt} sparse-matrix-benchmark.cpp )
Loading