Commit 3ccd032e authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

C++ compiler log identification was improved.

RowLengths was renamed to CompressedRowsLengths.
OpenMP support wads added to matrix-vector multiplication and to scalar
products.
CSR format can use Cusparse for SpMV.
parent 945ff662
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
Oberhuber Tomas <tomas.oberhuber@fjfi.cvut.cz>
Zabka Vitezslav <zabkavit@fjfi.cvut.cz>
Vladimir Klement
Tomáš Sobotík
Ondřej Székely
Jiří Kafka
Libor Bakajsa
Jakub Klinkovský
Vacata Jan
Heller Martin
Novotny Matej
+5 −10
Original line number Diff line number Diff line
@@ -41,13 +41,12 @@ if( WITH_CUDA STREQUAL "yes" )
        set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE OFF)
        set(BUILD_SHARED_LIBS ON)
        set(CUDA_SEPARABLE_COMPILATION ON)
        set(CUSPARSE_LIBRARY /usr/local/cuda/lib64/libcusparse.so) # TODO: fix this              
        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -DHAVE_CUDA )
        AddCompilerFlag( "-DHAVE_NOT_CXX11" ) # -U_GLIBCXX_ATOMIC_BUILTINS -U_GLIBCXX_USE_INT128 " )
        set( ALL_CUDA_ARCHS -gencode arch=compute_20,code=sm_20
                            -gencode arch=compute_30,code=sm_30
                            -gencode arch=compute_32,code=sm_32 
                            -gencode arch=compute_37,code=sm_37 
                            -gencode arch=compute_35,code=sm_35 
                            -gencode arch=compute_37,code=sm_37 
                            -gencode arch=compute_50,code=sm_50 
                            -gencode arch=compute_52,code=sm_52 )
@@ -87,7 +86,6 @@ if( WITH_CUDA STREQUAL "yes" )
        set( CUDA_ADD_LIBRARY_OPTIONS ${CUDA_ARCH} -shared )
        set( CUDA_LINKER_OPTIONS "-arch sm_20 -shared" )


        ####
        # Check for cuBLAS
        #
@@ -127,6 +125,7 @@ if( WITH_CUDA STREQUAL "yes" )
               message( "CUSPARSE found. -- ${CUSPARSE_INCLUDE_DIR}" )
               set( HAVE_CUSPARSE "#define HAVE_CUSPARSE" )
               cuda_include_directories( ${CUSPARSE_INCLUDE_DIR} )
               set( CUSPARSE_LIBRARY "${CUDA_cusparse_LIBRARY}" )
           endif()            
        endif( NOT WITH_CUSPARSE STREQUAL "no" )
   
@@ -143,9 +142,8 @@ endif( WITH_CUDA STREQUAL "yes" )
#
find_package( OpenMP ) 
if( OPENMP_FOUND )
   #AddCompilerFlag( "-DHAVE_OPENMP -fopenmp" )
   message( "Compiler supports OpenMP." )
   set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_OPENMP -fopenmp")
# TODO: finish this
endif()

####
@@ -184,9 +182,6 @@ else()
    set( HAVE_SYS_RESOURCE_H "#define HAVE_SYS_RESOURCE_H" )
endif()

  


####
# Check for cppunit
#
+4 −2
Original line number Diff line number Diff line
@@ -5,13 +5,15 @@ set( tnl_heat_equation_SOURCES
IF( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE(tnl-heat-equation${debugExt} tnl-heat-equation.cu)
   CUDA_ADD_EXECUTABLE(tnl-heat-equation-eoc-test${debugExt} tnl-heat-equation-eoc.cu)
   target_link_libraries (tnl-heat-equation${debugExt} tnl${debugExt}-${tnlVersion}  ${CUSPARSE_LIBRARY} )
   target_link_libraries (tnl-heat-equation-eoc-test${debugExt} tnl${debugExt}-${tnlVersion}  ${CUSPARSE_LIBRARY} )
ELSE(  BUILD_CUDA )               
   ADD_EXECUTABLE(tnl-heat-equation${debugExt} tnl-heat-equation.cpp)     
   ADD_EXECUTABLE(tnl-heat-equation-eoc-test${debugExt} tnl-heat-equation-eoc.cpp)   
ENDIF( BUILD_CUDA )

   target_link_libraries (tnl-heat-equation${debugExt} tnl${debugExt}-${tnlVersion} )
   target_link_libraries (tnl-heat-equation-eoc-test${debugExt} tnl${debugExt}-${tnlVersion} )
ENDIF( BUILD_CUDA )


INSTALL( TARGETS tnl-heat-equation${debugExt}
                 tnl-heat-equation-eoc-test${debugExt}
+1 −2
Original line number Diff line number Diff line
@@ -6,7 +6,6 @@ BUILD_RELEASE="yes"
OPTIONS=""

CMAKE_TEST=`which cmake`    
echo ${CMAKE_TEST}
if test x${CMAKE_TEST} = "x";
then
    echo "Cmake is not installed on your system. Please install it by:"
+4 −1
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@ using namespace std;
 */
const int minGPUReductionDataSize = 128;//65536; //16384;//1024;//256;

static tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize );
//static tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize );

#ifdef HAVE_CUDA

@@ -74,6 +74,9 @@ typename Operation::IndexType reduceOnCudaDevice( const Operation& operation,
   dim3 blockSize( 256 ), gridSize( 0 );   
   gridSize.x = Min( tnlCuda::getNumberOfBlocks( size, blockSize.x ), desGridSize );
  
   // create reference to the reduction buffer singleton and set default size
   tnlCudaReductionBuffer & cudaReductionBuffer = tnlCudaReductionBuffer::getInstance( 8 * minGPUReductionDataSize );
   
   //tnlCudaReductionBuffer cudaReductionBuffer( 8 * minGPUReductionDataSize );
   if( ! cudaReductionBuffer.setSize( gridSize.x * sizeof( ResultType ) ) )
      return false;
Loading