diff --git a/cmake/UseCodeCoverage.cmake b/cmake/UseCodeCoverage.cmake
index ce2053431474a1189bebd375a68e0edd14845dfd..d9ba5000cbb17b920fe5b4b31a07c93aafa438a8 100644
--- a/cmake/UseCodeCoverage.cmake
+++ b/cmake/UseCodeCoverage.cmake
@@ -27,6 +27,6 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
    add_compile_options( -fprofile-instr-generate -fcoverage-mapping )
    add_link_options( -fprofile-instr-generate -fcoverage-mapping )
    if( ${WITH_CUDA} )
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -fprofile-instr-generate ; -Xcompiler -fcoverage-mapping )
+      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ; -Xcompiler -fprofile-instr-generate ; -Xcompiler -fcoverage-mapping ; -g )
    endif()
 endif()
diff --git a/scripts/code_coverage/coverage.py b/scripts/code_coverage/coverage.py
index abc07785b9296bcc42cb81bb58842cd21f511c8a..68c20ba512c051e6f264795c5268225451b1e2d9 100755
--- a/scripts/code_coverage/coverage.py
+++ b/scripts/code_coverage/coverage.py
@@ -231,6 +231,8 @@ def execute_command(target, command, output_file_path):
     # For other cases, "%4m" is chosen as it creates some level of parallelism,
     # but it's not too big to consume too much computing resource or disk space.
     profile_pattern_string = "%1m"
+    if "mpi" in command:
+        profile_pattern_string = "%4m"
     expected_profraw_file_name = os.extsep.join(
         [target, profile_pattern_string, PROFRAW_FILE_EXTENSION])
     expected_profraw_file_path = os.path.join(
@@ -363,18 +365,29 @@ def verify_paths_and_return_absolutes(paths):
     return absolute_paths
 
 
-def get_binary_paths_from_targets(targets, build_dir):
+def parse_targets_arguments(targets_args, build_dir):
     """Return binary paths from target names."""
+    targets = []
+    commands = []
     binary_paths = []
-    for target in targets:
-        binary_path = os.path.join(build_dir, "bin", target)
-
-        if os.path.exists(binary_path):
-            binary_paths.append(binary_path)
+    for target in targets_args:
+        if "::" in target:
+           target, command = target.split("::")
+           target = target.strip()
         else:
+           command = target
+
+        binary_path = os.path.join(build_dir, "bin", target)
+        if not os.path.exists(binary_path):
             logging.warning("Target binary '{}' not found in build directory, skipping.".format(os.path.basename(binary_path)))
+            continue
+
+        targets.append(target)
+        command = command.replace(target, binary_path)
+        commands.append(command)
+        binary_paths.append(binary_path)
 
-    return binary_paths
+    return targets, commands, binary_paths
 
 
 def setup_output_dir():
@@ -450,8 +463,8 @@ if __name__ == "__main__":
 
     setup_output_dir()
 
-    binary_paths = get_binary_paths_from_targets(args.targets, args.build_dir)
-    profdata_file_path = create_coverage_profdata_for_targets(args.targets, binary_paths)
+    targets, commands, binary_paths = parse_targets_arguments(args.targets, args.build_dir)
+    profdata_file_path = create_coverage_profdata_for_targets(targets, commands)
 
     logging.info("Generating code coverage report in html...")
     per_file_summary_data = generate_per_file_coverage_summary(
diff --git a/scripts/make_coverage_report b/scripts/make_coverage_report
index 8a6163d0fe2ebe9748a8037a1ca3c58db42ad82b..e0c9f1820b900decc218fbdfc555b776e7452fda 100755
--- a/scripts/make_coverage_report
+++ b/scripts/make_coverage_report
@@ -12,20 +12,21 @@ pushd Debug
 popd
 
 tests=(
-#   AllocatorsTest-dbg  # FIXME: CUDA
+   AllocatorsTest-dbg
    AssertTest-dbg
-#   AssertCudaTest-dbg  # FIXME: CUDA
+   AssertCudaTest-dbg
    FileNameTest-dbg
-#   FileTest-dbg  # FIXME: CUDA
+   FileTest-dbg
    ObjectTest-dbg
-#   ParallelForTest-dbg  # FIXME: CUDA
+#   ParallelForTest-dbg  # FIXME: too slow
    StringTest-dbg
    TimerTest-dbg
+   TypeInfoTest-dbg
 
    # Containers
-#   ArrayOperationsTest-dbg  # FIXME: CUDA
+   ArrayOperationsTest-dbg
    ArrayTest-dbg
-#   ArrayTestCuda-dbg  # FIXME: CUDA
+   ArrayTestCuda-dbg
    ArrayViewTest-dbg
    VectorTest-dbg
    VectorPrefixSumTest-dbg
@@ -36,29 +37,39 @@ tests=(
    StaticArrayTest-dbg
    StaticVectorTest-dbg
    StaticVectorOperationsTest-dbg
+   NDArrayTest-dbg
+   NDSubarrayTest-dbg
+   SlicedNDArrayTest-dbg
+   StaticNDArrayTest-dbg
+   StaticNDArrayCudaTest-dbg
+   MultireductionTest-dbg
    ListTest-dbg
-#   MultireductionTest-dbg  # FIXME: CUDA
    MultimapTest-dbg
    StaticMultimapTest-dbg
-   # TODO: run these with mpirun (coverage.py does not support that)
-   DistributedArrayTest-dbg
-   DistributedVectorBinaryOperationsTest-dbg
-   DistributedVectorUnaryOperationsTest-dbg
-   DistributedVectorVerticalOperationsTest-dbg
+   "DistributedArrayTest-dbg                     :: mpirun -np 4 DistributedArrayTest-dbg"
+   "DistributedVectorTest-dbg                    :: mpirun -np 4 DistributedVectorTest-dbg"
+   "DistributedVectorBinaryOperationsTest-dbg    :: mpirun -np 4 DistributedVectorBinaryOperationsTest-dbg"
+   "DistributedVectorUnaryOperationsTest-dbg     :: mpirun -np 4 DistributedVectorUnaryOperationsTest-dbg"
+   "DistributedVectorVerticalOperationsTest-dbg  :: mpirun -np 4 DistributedVectorVerticalOperationsTest-dbg"
+   "DistributedNDArray_1D_test-dbg               :: mpirun -np 4 DistributedNDArray_1D_test-dbg"
+   "DistributedNDArray_semi1D_test-dbg           :: mpirun -np 4 DistributedNDArray_semi1D_test-dbg"
+   "DistributedNDArrayOverlaps_1D_test-dbg       :: mpirun -np 4 DistributedNDArrayOverlaps_1D_test-dbg"
+   "DistributedNDArrayOverlaps_semi1D_test-dbg   :: mpirun -np 4 DistributedNDArrayOverlaps_semi1D_test-dbg"
 
    # Functions
-#   BoundaryMeshFunctionTest-dbg  # FIXME: CUDA
-#   MeshFunctionTest-dbg  # FIXME: CUDA
+   BoundaryMeshFunctionTest-dbg
+   MeshFunctionTest-dbg
 
    # Matrices
-#   DenseMatrixTest-dbg  # FIXME: CUDA
-#   SparseMatrixCopyTest-dbg  # FIXME: CUDA
-#   SparseMatrixTest_AdEllpack-dbg  # FIXME: CUDA
-#   SparseMatrixTest_BiEllpack-dbg  # FIXME: CUDA
-#   SparseMatrixTest_ChunkedEllpack-dbg  # FIXME: CUDA
-#   SparseMatrixTest_CSR-dbg  # FIXME: CUDA
-#   SparseMatrixTest_Ellpack-dbg  # FIXME: CUDA
-#   SparseMatrixTest_SlicedEllpack-dbg  # FIXME: CUDA
+   DenseMatrixTest-dbg
+   SparseMatrixCopyTest-dbg
+   SparseMatrixTest_AdEllpack-dbg
+   SparseMatrixTest_BiEllpack-dbg
+   SparseMatrixTest_ChunkedEllpack-dbg
+   SparseMatrixTest_CSR-dbg
+   SparseMatrixTest_Ellpack-dbg
+   SparseMatrixTest_SlicedEllpack-dbg
+   "DistributedMatrixTest-dbg   :: mpirun -np 4 DistributedMatrixTest-dbg"
 
    # Meshes
    BoundaryTagsTest-dbg
@@ -69,4 +80,4 @@ tests=(
 )
 
 # run the programs and create coverage reports
-./scripts/code_coverage/coverage.py -b Debug -o coverage_report/ ${tests[@]}
+./scripts/code_coverage/coverage.py -b Debug -o coverage_report/ --ignore-filename-regex="^\/tmp\/tmpxft_.*" "${tests[@]}"