From 45ad3fa79451c194e26fc492a695cb7ed0efd7a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Fri, 11 Jun 2021 16:11:16 +0200 Subject: [PATCH] Implementing Light CSR kernel for segments. --- src/TNL/Algorithms/Segments/CSR.h | 6 +- src/TNL/Algorithms/Segments/CSRView.h | 8 +- .../Segments/Kernels/CSRLightKernel.h | 64 +++ .../Segments/Kernels/CSRLightKernel.hpp | 422 ++++++++++++++++++ src/UnitTests/Matrices/CMakeLists.txt | 2 + .../Matrices/SparseMatrixTest_CSRLight.cpp | 11 + .../Matrices/SparseMatrixTest_CSRLight.cu | 1 + .../Matrices/SparseMatrixTest_CSRLight.h | 46 ++ ...SparseMatrixVectorProductTest_CSRLight.cpp | 11 + .../SparseMatrixVectorProductTest_CSRLight.cu | 1 + .../SparseMatrixVectorProductTest_CSRLight.h | 46 ++ 11 files changed, 616 insertions(+), 2 deletions(-) create mode 100644 src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h create mode 100644 src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cpp create mode 120000 src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_CSRLight.h create mode 100644 src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cpp create mode 120000 src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.h diff --git a/src/TNL/Algorithms/Segments/CSR.h b/src/TNL/Algorithms/Segments/CSR.h index eebd186a6f..8fba00b2a5 100644 --- a/src/TNL/Algorithms/Segments/CSR.h +++ b/src/TNL/Algorithms/Segments/CSR.h @@ -546,6 +546,11 @@ template< typename Device, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > using CSRHybrid = CSR< Device, Index, CSRHybridKernel< Index, Device >, IndexAllocator >; +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +using CSRLight = CSR< Device, Index, CSRLightKernel< Index, Device >, IndexAllocator >; + template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > @@ -556,7 +561,6 @@ template< typename Device, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > using CSRDefault = CSRScalar< Device, Index, IndexAllocator >; - } // namespace Segments } // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Algorithms/Segments/CSRView.h b/src/TNL/Algorithms/Segments/CSRView.h index 8770f8ca86..884ed71cf3 100644 --- a/src/TNL/Algorithms/Segments/CSRView.h +++ b/src/TNL/Algorithms/Segments/CSRView.h @@ -17,6 +17,7 @@ #include <TNL/Algorithms/Segments/Kernels/CSRScalarKernel.h> #include <TNL/Algorithms/Segments/Kernels/CSRVectorKernel.h> #include <TNL/Algorithms/Segments/Kernels/CSRHybridKernel.h> +#include <TNL/Algorithms/Segments/Kernels/CSRLightKernel.h> #include <TNL/Algorithms/Segments/Kernels/CSRAdaptiveKernel.h> #include <TNL/Algorithms/Segments/SegmentsPrinting.h> @@ -163,9 +164,14 @@ template< typename Device, typename Index > using CSRViewVector = CSRView< Device, Index, CSRVectorKernel< Index, Device > >; +template< typename Device, + typename Index, + int ThreadsInBlock = 256 > +using CSRViewHybrid = CSRView< Device, Index, CSRHybridKernel< Index, Device, ThreadsInBlock > >; + template< typename Device, typename Index > -using CSRViewHybrid = CSRView< Device, Index, CSRHybridKernel< Index, Device > >; +using CSRViewLight = CSRView< Device, Index, CSRLightKernel< Index, Device > >; template< typename Device, typename Index > diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h new file mode 100644 index 0000000000..a3aa961b40 --- /dev/null +++ b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.h @@ -0,0 +1,64 @@ +/*************************************************************************** + CSRLightKernel.h - description + ------------------- + begin : Jun 9, 2021 -> Joe Biden inauguration + copyright : (C) 2021 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/Segments/details/LambdaAdapter.h> + +namespace TNL { + namespace Algorithms { + namespace Segments { + +template< typename Index, + typename Device > +struct CSRLightKernel +{ + using IndexType = Index; + using DeviceType = Device; + using ViewType = CSRLightKernel< Index, Device >; + using ConstViewType = CSRLightKernel< Index, Device >; + + template< typename Offsets > + void init( const Offsets& offsets ); + + void reset(); + + ViewType getView(); + + ConstViewType getConstView() const; + + static TNL::String getKernelType(); + + template< typename OffsetsView, + typename Fetch, + typename Reduction, + typename ResultKeeper, + typename Real > + void reduceSegments( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduction& reduction, + ResultKeeper& keeper, + const Real& zero ) const; + + protected: + int threadsPerSegment = 0; +}; + + } // namespace Segments + } // namespace Algorithms +} // namespace TNL + +#include <TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp> diff --git a/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp new file mode 100644 index 0000000000..e31ff3f43e --- /dev/null +++ b/src/TNL/Algorithms/Segments/Kernels/CSRLightKernel.hpp @@ -0,0 +1,422 @@ +/*************************************************************************** + CSRLightKernel.hpp - description + ------------------- + begin : Jun 9, 2021 -> Joe Biden inauguration + copyright : (C) 2021 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/Segments/details/LambdaAdapter.h> +#include <TNL/Algorithms/Segments/Kernels/CSRLightKernel.h> + +namespace TNL { + namespace Algorithms { + namespace Segments { + +#ifdef HAVE_CUDA +template< typename Real, + typename Index, + typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep > +__global__ +void SpMVCSRLightWithoutAtomic2( OffsetsView offsets, + const Index first, + const Index last, + Fetch fetch, + Reduce reduce, + Keep keep, + const Real zero, + const Index gridID) +{ + const Index segmentIdx = + first + ( ( gridID * TNL::Cuda::getMaxGridXSize() ) + (blockIdx.x * blockDim.x) + threadIdx.x ) / 2; + if( segmentIdx >= last ) + return; + + const Index inGroupID = threadIdx.x & 1; // & is cheaper than % + const Index maxID = offsets[ segmentIdx + 1]; + + Real result = zero; + for( Index i = offsets[segmentIdx] + inGroupID; i < maxID; i += 2) + result = reduce( result, fetch( i, true ) ); + + /* Parallel reduction */ + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 1 ) ); + + /* Write result */ + if( inGroupID == 0 ) + keep( segmentIdx, result ); +} + +template< typename Real, + typename Index, + typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep > +__global__ +void SpMVCSRLightWithoutAtomic4( OffsetsView offsets, + const Index first, + const Index last, + Fetch fetch, + Reduce reduce, + Keep keep, + const Real zero, + const Index gridID ) +{ + const Index segmentIdx = + first + ((gridID * TNL::Cuda::getMaxGridXSize() ) + (blockIdx.x * blockDim.x) + threadIdx.x) / 4; + if (segmentIdx >= last) + return; + + const Index inGroupID = threadIdx.x & 3; // & is cheaper than % + const Index maxID = offsets[segmentIdx + 1]; + + Real result = zero; + for (Index i = offsets[segmentIdx] + inGroupID; i < maxID; i += 4) + result = reduce( result, fetch( i, true ) ); + + /* Parallel reduction */ + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 2 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 1 ) ); + + /* Write result */ + if( inGroupID == 0 ) + keep( segmentIdx, result ); + +} + +template< typename Real, + typename Index, + typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep > +__global__ +void SpMVCSRLightWithoutAtomic8( OffsetsView offsets, + const Index first, + const Index last, + Fetch fetch, + Reduce reduce, + Keep keep, + const Real zero, + const Index gridID) +{ + const Index segmentIdx = + first + ((gridID * TNL::Cuda::getMaxGridXSize() ) + (blockIdx.x * blockDim.x) + threadIdx.x) / 8; + if (segmentIdx >= last) + return; + + Index i; + const Index inGroupID = threadIdx.x & 7; // & is cheaper than % + const Index maxID = offsets[segmentIdx + 1]; + + Real result = zero; + for (i = offsets[segmentIdx] + inGroupID; i < maxID; i += 8) + result = reduce( result, fetch( i, true ) ); + + /* Parallel reduction */ + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 4 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 2 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 1 ) ); + + /* Write result */ + if( inGroupID == 0 ) + keep( segmentIdx, result ); +} + +template< typename Real, + typename Index, + typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep > +__global__ +void SpMVCSRLightWithoutAtomic16( OffsetsView offsets, + const Index first, + const Index last, + Fetch fetch, + Reduce reduce, + Keep keep, + const Real zero, + const Index gridID ) +{ + const Index segmentIdx = + first + ((gridID * TNL::Cuda::getMaxGridXSize() ) + (blockIdx.x * blockDim.x) + threadIdx.x ) / 16; + if( segmentIdx >= last ) + return; + + + Index i; + const Index inGroupID = threadIdx.x & 15; // & is cheaper than % + const Index maxID = offsets[segmentIdx + 1]; + + Real result = zero; + for( i = offsets[segmentIdx] + inGroupID; i < maxID; i += 16 ) + result = reduce( result, fetch( i, true ) ); + + /* Parallel reduction */ + result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result, 8 ) ); + result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result, 4 ) ); + result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result, 2 ) ); + result = reduce( result, __shfl_down_sync( 0xFFFFFFFF, result, 1 ) ); + + /* Write result */ + if( inGroupID == 0 ) + keep( segmentIdx, result ); +} + +template< typename Real, + typename Index, + typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep > +__global__ +void SpMVCSRVector( OffsetsView offsets, + const Index first, + const Index last, + Fetch fetch, + Reduce reduce, + Keep keep, + const Real zero, + const Index gridID ) +{ + const int warpSize = 32; + const Index warpID = first + ((gridID * TNL::Cuda::getMaxGridXSize() ) + (blockIdx.x * blockDim.x) + threadIdx.x) / warpSize; + if (warpID >= last) + return; + + Real result = zero; + const Index laneID = threadIdx.x & 31; // & is cheaper than % + Index endID = offsets[warpID + 1]; + + /* Calculate result */ + for (Index i = offsets[warpID] + laneID; i < endID; i += warpSize) + result = reduce( result, fetch( i, true ) ); + + /* Reduction */ + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 16 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 8 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 4 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 2 ) ); + result = reduce( result, __shfl_down_sync(0xFFFFFFFF, result, 1 ) ); + /* Write result */ + if( laneID == 0 ) + keep( warpID, result ); +} +#endif +template< typename Index, + typename Device, + typename Fetch, + typename Reduce, + typename Keep, + bool DispatchScalarCSR = + details::CheckFetchLambda< Index, Fetch >::hasAllParameters() || + std::is_same< Device, Devices::Host >::value > +struct CSRLightKernelreduceSegmentsDispatcher; + +template< typename Index, + typename Device, + typename Fetch, + typename Reduction, + typename ResultKeeper > +struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduction, ResultKeeper, true > +{ + + template< typename Offsets, + typename Real > + static void reduce( const Offsets& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduction& reduce, + ResultKeeper& keep, + const Real& zero, + const Index threadsPerSegment ) + { + TNL::Algorithms::Segments::CSRScalarKernel< Index, Device >:: + reduceSegments( offsets, first, last, fetch, reduce, keep, zero ); + } +}; + +template< typename Index, + typename Device, + typename Fetch, + typename Reduce, + typename Keep > +struct CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Keep, false > +{ + template< typename OffsetsView, + typename Real > + static void reduce( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduce& reduce, + Keep& keep, + const Real& zero, + const Index threadsPerSegment ) + { +#ifdef HAVE_CUDA + const int threads = 128; + Index blocks, groupSize; + //if (KernelType == CSRLightWithoutAtomic) + int neededThreads = threadsPerSegment * ( last - first ); + //else + // neededThreads = rows * (threadsPerSegment > 32 ? 32 : threadsPerSegment); + + /* Execute kernels on device */ + for (Index grid = 0; neededThreads != 0; ++grid) + { + if( TNL::Cuda::getMaxGridXSize() * threads >= neededThreads) + { + blocks = roundUpDivision(neededThreads, threads); + neededThreads = 0; + } + else + { + blocks = TNL::Cuda::getMaxGridXSize(); + neededThreads -= TNL::Cuda::getMaxGridXSize() * threads; + } + + if (threadsPerSegment == 2) + SpMVCSRLightWithoutAtomic2<Real, Index, OffsetsView, Fetch, Reduce, Keep ><<<blocks, threads>>>( + offsets, first, last, fetch, reduce, keep, zero, grid ); + else if (threadsPerSegment == 4) + SpMVCSRLightWithoutAtomic4<Real, Index, OffsetsView, Fetch, Reduce, Keep ><<<blocks, threads>>>( + offsets, first, last, fetch, reduce, keep, zero, grid ); + else if (threadsPerSegment == 8) + SpMVCSRLightWithoutAtomic8<Real, Index, OffsetsView, Fetch, Reduce, Keep ><<<blocks, threads>>>( + offsets, first, last, fetch, reduce, keep, zero, grid ); + else if (threadsPerSegment == 16) + SpMVCSRLightWithoutAtomic16<Real, Index, OffsetsView, Fetch, Reduce, Keep ><<<blocks, threads>>>( + offsets, first, last, fetch, reduce, keep, zero, grid ); + else // if (threadsPerSegment == 32) + { // CSR SpMV Light with threadsPerSegment = 32 is CSR Vector + SpMVCSRVector<Real, Index, OffsetsView, Fetch, Reduce, Keep, warpSize ><<<blocks, threads>>>( + offsets, first, last, fetch, reduce, keep, zero, grid ); + } + /*else + { // Execute CSR MultiVector + SpMVCSRMultiVector<Real, Index, warpSize><<<blocks, threads>>>( + inVector, outVector, matrix.getoffsets().getData(), + matrix.getColumnIndexes().getData(), matrix.getValues().getData(), + rows, threadsPerSegment / 32, grid + ); + }*/ + + } +#endif + + } +}; + + +template< typename Index, + typename Device > + template< typename Offsets > +void +CSRLightKernel< Index, Device >:: +init( const Offsets& offsets ) +{ + //const Index elementsInSegment = std::ceil( ( double ) offsets.getElement( segmentsCount ) / ( double ) segmentsCount ); + //this->threadsPerSegment = TNL::min( std::pow( 2, std::ceil( std::log2( elementsInSegment ) ) ) ); //TNL::Cuda::getWarpSize() ); + + const Index segmentsCount = offsets.getSize() - 1; + //const Index threads = 128; // !!!!!!!!!!!!!!!!!!!!!! block size + size_t neededThreads = segmentsCount * 32;//warpSize; + Index blocks, threadsPerSegment; + + const Index elementsInSegment = roundUpDivision( offsets.getElement( segmentsCount ), segmentsCount ); // non zeroes per row + if( elementsInSegment <= 2 ) + threadsPerSegment = 2; + else if( elementsInSegment <= 4 ) + threadsPerSegment = 4; + else if( elementsInSegment <= 8 ) + threadsPerSegment = 8; + else if( elementsInSegment <= 16 ) + threadsPerSegment = 16; + else //if (nnz <= 2 * matrix.MAX_ELEMENTS_PER_WARP) + threadsPerSegment = 32; // CSR Vector + //else + // threadsPerSegment = roundUpDivision(nnz, matrix.MAX_ELEMENTS_PER_WARP) * 32; // CSR MultiVector + + TNL_ASSERT_GE( threadsPerSegment, 0, "" ); + TNL_ASSERT_LE( threadsPerSegment, 33, "" ); + +} + +template< typename Index, + typename Device > +void +CSRLightKernel< Index, Device >:: +reset() +{ + this->threadsPerSegment = 0; +} + +template< typename Index, + typename Device > +auto +CSRLightKernel< Index, Device >:: +getView() -> ViewType +{ + return *this; +} + +template< typename Index, + typename Device > +TNL::String +CSRLightKernel< Index, Device >:: +getKernelType() +{ + return "Light"; +} + +template< typename Index, + typename Device > +auto +CSRLightKernel< Index, Device >:: +getConstView() const -> ConstViewType +{ + return *this; +}; + + +template< typename Index, + typename Device > + template< typename OffsetsView, + typename Fetch, + typename Reduce, + typename Keep, + typename Real > +void +CSRLightKernel< Index, Device >:: +reduceSegments( const OffsetsView& offsets, + Index first, + Index last, + Fetch& fetch, + const Reduce& reduce, + Keep& keep, + const Real& zero ) const +{ + TNL_ASSERT_GE( this->threadsPerSegment, 0, "" ); + TNL_ASSERT_LE( this->threadsPerSegment, 33, "" ); + CSRLightKernelreduceSegmentsDispatcher< Index, Device, Fetch, Reduce, Keep >::reduce( + offsets, first, last, fetch, reduce, keep, zero, this->threadsPerSegment ); +} + + } // namespace Segments + } // namespace Algorithms +} // namespace TNL diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index fa8876993c..2fe0f39ee7 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -8,6 +8,7 @@ set( COMMON_TESTS SparseMatrixTest_CSRScalar SparseMatrixTest_CSRVector SparseMatrixTest_CSRHybrid + SparseMatrixTest_CSRLight SparseMatrixTest_CSRAdaptive SparseMatrixTest_Ellpack SparseMatrixTest_SlicedEllpack @@ -16,6 +17,7 @@ set( COMMON_TESTS SparseMatrixVectorProductTest_CSRScalar SparseMatrixVectorProductTest_CSRVector SparseMatrixVectorProductTest_CSRHybrid + SparseMatrixVectorProductTest_CSRLight SparseMatrixVectorProductTest_CSRAdaptive SparseMatrixVectorProductTest_Ellpack SparseMatrixVectorProductTest_SlicedEllpack diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cpp new file mode 100644 index 0000000000..d6a3a41cd3 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_CSRLight.cpp - description + ------------------- + begin : Jun 9, 2021 + copyright : (C) 2021 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixTest_CSRLight.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cu new file mode 120000 index 0000000000..e40135b9e9 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.cu @@ -0,0 +1 @@ +SparseMatrixTest_CSRLight.cpp \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.h b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.h new file mode 100644 index 0000000000..ddd956a52c --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSRLight.h @@ -0,0 +1,46 @@ +/*************************************************************************** + SparseMatrixTest_CSRLight.h - description + ------------------- + begin : Jun 9, 2021 + copyright : (C) 2021 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <iostream> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrix.h> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRLight_segments"; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight > +#endif +>; + +#endif + +#include "SparseMatrixTest.h" +#include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cpp b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cpp new file mode 100644 index 0000000000..274fa20b5f --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixVectorProductTest_CSRHybrid.cpp - description + ------------------- + begin : Jun 9, 2021 + copyright : (C) 2021 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixVectorProductTest_CSRHybrid.h" diff --git a/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cu b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cu new file mode 120000 index 0000000000..68e56b2ee0 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.cu @@ -0,0 +1 @@ +SparseMatrixVectorProductTest_CSRLight.cpp \ No newline at end of file diff --git a/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.h b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.h new file mode 100644 index 0000000000..eef049eacf --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixVectorProductTest_CSRLight.h @@ -0,0 +1,46 @@ +/*************************************************************************** + SparseMatrixVectorProductTest_CSRLight.h - description + ------------------- + begin : Jun 9, 2021 + copyright : (C) 2021 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <iostream> +#include <TNL/Algorithms/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrix.h> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +const char* saveAndLoadFileName = "test_SparseMatrixTest_CSRLight_segments"; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight > +#endif +>; + +#endif + +#include "SparseMatrixVectorProductTest.h" +#include "../main.h" -- GitLab