Commit 9e8685af authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'TO/matrices' into 'develop'

To/matrices

See merge request !80
parents 914aff45 5a1c8790
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -12,3 +12,6 @@
/.settings
/.project
/.pydevproject

# VSCode
/.vscode
+27 −15
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ stages:
        - cmake ../..
                -G Ninja
                -DCMAKE_BUILD_TYPE=${BUILD_TYPE}
                -DCMAKE_INSTALL_PREFIX=$(pwd)/${BUILD_TYPE}_install_prefix
                -DCMAKE_INSTALL_PREFIX="$(pwd)/${BUILD_TYPE}_install_prefix"
                -DWITH_OPENMP=${WITH_OPENMP}
                -DWITH_MPI=${WITH_MPI}
                -DWITH_CUDA=${WITH_CUDA}
@@ -102,8 +102,6 @@ stages:
    only:
        changes:
            - src/**/*.{h,hpp,cpp,cu}
            - Documentation/Examples/**/*.{h,hpp,cpp,cu}
            - Documentation/Tutorials/**/*.{h,hpp,cpp,cu}
            - "**/CMakeLists.txt"
            - .gitlab-ci.yml
    interruptible: true
@@ -117,11 +115,12 @@ dummy build job:
        - merge_requests
    except:
        changes:
            # .build_template
            - src/**/*.{h,hpp,cpp,cu}
            - Documentation/Examples/**/*.{h,hpp,cpp,cu}
            - Documentation/Tutorials/**/*.{h,hpp,cpp,cu}
            - "**/CMakeLists.txt"
            - .gitlab-ci.yml
            # build documentation
            - Documentation/**/*

# Cuda builds are specified first because they take more time than host-only builds,
# which can be allocated on hosts whitout GPUs.
@@ -189,12 +188,6 @@ cuda_examples_Debug:
        WITH_CUDA: "yes"
        BUILD_TYPE: Debug
        WITH_EXAMPLES: "yes"
        # build output snippets for documentation
        WITH_DOC: "yes"
    # store output snippets for documentation
    artifacts:
        paths:
            - Documentation/output_snippets/

cuda_examples_Release:
    extends: .build_template
@@ -468,6 +461,28 @@ clang_mpi_benchmarks_tools_python_Release:



documentation examples:
    extends: .build_template
    stage: build:cuda
    tags:
        - docker
        - nvidia
    variables:
        <<: *default_cmake_flags
        WITH_CUDA: "yes"
        BUILD_TYPE: Debug
        # build output snippets for documentation
        WITH_DOC: "yes"
    only:
        changes:
            - Documentation/**/*
            - src/TNL/**/*.{h,hpp}
            - .gitlab-ci.yml
    # store output snippets for documentation
    artifacts:
        paths:
            - Documentation/output_snippets/

build documentation:
    stage: build:doc
    only:
@@ -477,16 +492,13 @@ build documentation:
            - .gitlab-ci.yml
    # use "needs" instead of "dependencies" to allow out-of-order start of this job
    needs:
        # the job which builds Documentation/output_snippets/
        - job: cuda_examples_Debug
        - job: documentation examples
          artifacts: true
    script:
        - ./Documentation/build
    artifacts:
        paths:
            - ./Documentation/html/
#    tags:
#        - doxygen

deploy documentation:
    stage: deploy
+3 −3
Original line number Diff line number Diff line
@@ -16,8 +16,8 @@ void setElements()
   std::cout << "Matrix set from the host:" << std::endl;
   std::cout << *matrix << std::endl;

   auto f = [=] __cuda_callable__ ( int i ) mutable {
      matrix->setElement( i, i, -i );
   auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
      matrix->addElement( i, j, 5.0 );
   };

   /***
@@ -26,7 +26,7 @@ void setElements()
    * DenseMatrixView::getRow example for details.
    */
   TNL::Pointers::synchronizeSmartPointersOnDevice< Device >();
   TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
   TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, 5, 5, f );

   std::cout << "Matrix set from its native device:" << std::endl;
   std::cout << *matrix << std::endl;
+3 −3
Original line number Diff line number Diff line
@@ -14,10 +14,10 @@ void setElements()
   std::cout << "Matrix set from the host:" << std::endl;
   std::cout << matrix << std::endl;

   auto f = [=] __cuda_callable__ ( int i ) mutable {
      matrixView.setElement( i, i, -i );
   auto f = [=] __cuda_callable__ ( int i, int j ) mutable {
      matrixView.addElement( i, j, 5.0 );
   };
   TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f );
   TNL::Algorithms::ParallelFor2D< Device >::exec( 0, 0, 5, 5, f );

   std::cout << "Matrix set from its native device:" << std::endl;
   std::cout << matrix << std::endl;
+24 −2
Original line number Diff line number Diff line
@@ -13,8 +13,18 @@ ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_getNonzeroElementsCount >
                     ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_getNonzeroElementsCount.out
                    OUTPUT LambdaMatrixExample_getNonzeroElementsCount.out )


IF( BUILD_CUDA )
   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_Laplace_cuda LambdaMatrixExample_Laplace.cu )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_cuda >
                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace.out
                     OUTPUT LambdaMatrixExample_Laplace.out )

   CUDA_ADD_EXECUTABLE( LambdaMatrixExample_Laplace_2_cuda LambdaMatrixExample_Laplace_2.cu )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_2_cuda >
                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace_2.out
                       OUTPUT LambdaMatrixExample_Laplace_2.out )
                  

                     CUDA_ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction_cuda LambdaMatrixExample_rowsReduction.cu )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction_cuda >
                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
@@ -36,6 +46,16 @@ IF( BUILD_CUDA )
                       OUTPUT LambdaMatrixExample_forAllRows.out )

ELSE()
   ADD_EXECUTABLE( LambdaMatrixExample_Laplace LambdaMatrixExample_Laplace.cpp )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace >
                       ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace.out
                       OUTPUT LambdaMatrixExample_Laplace.out )

   ADD_EXECUTABLE( LambdaMatrixExample_Laplace_2 LambdaMatrixExample_Laplace_2.cpp )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_Laplace_2 >
                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_Laplace_2.out
                        OUTPUT LambdaMatrixExample_Laplace_2.out )

   ADD_EXECUTABLE( LambdaMatrixExample_rowsReduction LambdaMatrixExample_rowsReduction.cpp )
   ADD_CUSTOM_COMMAND( COMMAND LambdaMatrixExample_rowsReduction >
                        ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/LambdaMatrixExample_rowsReduction.out
@@ -59,6 +79,8 @@ ENDIF()

ADD_CUSTOM_TARGET( RunLambdaMatricesExamples ALL DEPENDS
   LambdaMatrixExample_Constructor.out
   LambdaMatrixExample_Laplace.out
   LambdaMatrixExample_Laplace_2.out
   LambdaMatrixExample_getCompressedRowLengths.out
   LambdaMatrixExample_getNonzeroElementsCount.out
   LambdaMatrixExample_rowsReduction.out
Loading