Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • GEM
  • JB/mesh-orig
  • JK/execution-explosive
  • JK/tidy
  • SB/dense-matrix
  • TO/Hip
  • TO/google-benchmark
  • TO/grid-rebase
  • TO/heat-equation
  • TO/julia
  • TO/matrices
  • TO/segments
  • TSJ/matrix-operations
  • YH/convolution
  • YH/grid
  • YH/grid-final
  • YH/implement-benchmarks
  • anselm-special-log
  • backtrace2line
  • develop
  • develop_Tom_test
  • euler
  • euler2
  • euler_stable
  • lbm
  • master
26 results

Target

Select target project
No results found
Select Git revision
  • GEM
  • JB/mesh-orig
  • JK/execution-explosive
  • JK/tidy
  • SB/dense-matrix
  • TO/Hip
  • TO/google-benchmark
  • TO/grid-rebase
  • TO/heat-equation
  • TO/julia
  • TO/matrices
  • TO/segments
  • TSJ/matrix-operations
  • YH/convolution
  • YH/grid
  • YH/grid-final
  • YH/implement-benchmarks
  • anselm-special-log
  • backtrace2line
  • develop
  • develop_Tom_test
  • euler
  • euler2
  • euler_stable
  • lbm
  • master
26 results
Show changes
Commits on Source (106)
Showing
with 2500 additions and 1686 deletions
/***************************************************************************
tnl-benchmark-spmv.cpp - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-old-spmv.h"
/***************************************************************************
tnl-benchmark-spmv.cu - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-old-spmv.h"
This diff is collapsed.
......@@ -8,6 +8,8 @@
/* See Copyright Notice in tnl/Copyright */
#ifdef NOT_USED_ANYMORE
#include <TNL/Assert.h>
#include <TNL/Devices/Cuda.h>
#ifdef HAVE_CUDA
......@@ -157,3 +159,4 @@ class CusparseCSR< float > : public CusparseCSRBase< float >
} // namespace TNL
#endif
\ No newline at end of file
/***************************************************************************
tnlCusparseCSR.h - description
-------------------
begin : Jul 3, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include <TNL/Assert.h>
#include <TNL/Devices/Cuda.h>
#ifdef HAVE_CUDA
#include <cusparse.h>
#endif
namespace TNL {
template< typename Real >
class CusparseCSRBase
{
public:
typedef Real RealType;
typedef Devices::Cuda DeviceType;
typedef Matrices::CSR< RealType, Devices::Cuda, int > MatrixType;
CusparseCSRBase()
: matrix( 0 )
{
};
#ifdef HAVE_CUDA
void init( const MatrixType& matrix,
cusparseHandle_t* cusparseHandle )
{
this->matrix = &matrix;
this->cusparseHandle = cusparseHandle;
cusparseCreateMatDescr( & this->matrixDescriptor );
};
#endif
int getRows() const
{
return matrix->getRows();
}
int getColumns() const
{
return matrix->getColumns();
}
int getNumberOfMatrixElements() const
{
return matrix->getNumberOfMatrixElements();
}
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->values.getSize(),
1.0,
this->matrixDescriptor,
this->matrix->values.getData(),
this->matrix->rowPointers.getData(),
this->matrix->columnIndexes.getData(),
inVector.getData(),
1.0,
outVector.getData() );
#endif
}
protected:
const MatrixType* matrix;
#ifdef HAVE_CUDA
cusparseHandle_t* cusparseHandle;
cusparseMatDescr_t matrixDescriptor;
#endif
};
template< typename Real >
class CusparseCSR
{};
template<>
class CusparseCSR< double > : public CusparseCSRBase< double >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
double d = 1.0;
double* alpha = &d;
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
}
};
template<>
class CusparseCSR< float > : public CusparseCSRBase< float >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
float d = 1.0;
float* alpha = &d;
cusparseScsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
}
};
} // namespace TNL
\ No newline at end of file
/***************************************************************************
spmv.h - description
-------------------
begin : Dec 30, 2018
copyright : (C) 2015 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Lukas Cejka
// Original implemented by J. Klinkovsky in Benchmarks/BLAS
// This is an edited copy of Benchmarks/BLAS/spmv.h by: Lukas Cejka
#pragma once
#include "../Benchmarks.h"
#include <TNL/Pointers/DevicePointer.h>
#include <TNL/Matrices/CSR.h>
#include <TNL/Matrices/Ellpack.h>
#include <TNL/Matrices/SlicedEllpack.h>
#include <TNL/Matrices/ChunkedEllpack.h>
#include <TNL/Matrices/AdEllpack.h>
#include <TNL/Matrices/BiEllpack.h>
#include <TNL/Matrices/MatrixReader.h>
using namespace TNL::Matrices;
#include "cusparseCSRMatrix.h"
namespace TNL {
namespace Benchmarks {
// Alias to match the number of template parameters with other formats
template< typename Real, typename Device, typename Index >
using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >;
// Get the name (with extension) of input matrix file
std::string getMatrixFileName( const String& InputFileName )
{
std::string fileName = InputFileName;
const size_t last_slash_idx = fileName.find_last_of( "/\\" );
if( std::string::npos != last_slash_idx )
fileName.erase( 0, last_slash_idx + 1 );
return fileName;
}
// Get only the name of the format from getType()
template< typename Matrix >
std::string getMatrixFormat( const Matrix& matrix )
{
std::string mtrxFullType = getType( matrix );
std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) );
std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 );
return format;
}
// Print information about the matrix.
template< typename Matrix >
void printMatrixInfo( const Matrix& matrix,
std::ostream& str )
{
str << "\n Format: " << getMatrixFormat( matrix ) << std::endl;
str << " Rows: " << matrix.getRows() << std::endl;
str << " Cols: " << matrix.getColumns() << std::endl;
str << " Nonzero Elements: " << matrix.getNumberOfNonzeroMatrixElements() << std::endl;
}
template< typename Real,
template< typename, typename, typename > class Matrix,
template< typename, typename, typename, typename > class Vector = Containers::Vector >
bool
benchmarkSpMV( Benchmark& benchmark,
const String& inputFileName,
bool verboseMR )
{
// Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function
typedef Matrices::CSR< Real, Devices::Host, int > CSR_HostMatrix;
typedef Matrices::CSR< Real, Devices::Cuda, int > CSR_DeviceMatrix;
CSR_HostMatrix CSRhostMatrix;
CSR_DeviceMatrix CSRdeviceMatrix;
// Read the matrix for CSR, to set up cuSPARSE
try
{
if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) )
{
throw std::bad_alloc();
return false;
}
}
catch( std::bad_alloc& e )
{
e.what();
return false;
}
#ifdef HAVE_CUDA
// cuSPARSE handle setup
cusparseHandle_t cusparseHandle;
cusparseCreate( &cusparseHandle );
// cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device
CSRdeviceMatrix = CSRhostMatrix;
// Delete the CSRhostMatrix, so it doesn't take up unnecessary space
CSRhostMatrix.reset();
// Initialize the cusparseCSR matrix.
TNL::CusparseCSR< Real > cusparseCSR;
cusparseCSR.init( CSRdeviceMatrix, &cusparseHandle );
#endif
// Setup the format which is given as a template parameter to this function
typedef Matrix< Real, Devices::Host, int > HostMatrix;
typedef Matrix< Real, Devices::Cuda, int > DeviceMatrix;
typedef Containers::Vector< Real, Devices::Host, int > HostVector;
typedef Containers::Vector< Real, Devices::Cuda, int > CudaVector;
HostMatrix hostMatrix;
DeviceMatrix deviceMatrix;
HostVector hostVector, hostVector2;
CudaVector deviceVector, deviceVector2;
// Load the format
try
{
if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) )
{
throw std::bad_alloc();
return false;
}
}
catch( std::bad_alloc& e )
{
e.what();
return false;
}
// Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS),
// because we need the matrix loaded first to get the rows and columns
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
{ "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) },
{ "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) }
} ));
hostVector.setSize( hostMatrix.getColumns() );
hostVector2.setSize( hostMatrix.getRows() );
#ifdef HAVE_CUDA
deviceMatrix = hostMatrix;
deviceVector.setSize( hostMatrix.getColumns() );
deviceVector2.setSize( hostMatrix.getRows() );
#endif
// reset function
auto reset = [&]() {
hostVector.setValue( 1.0 );
hostVector2.setValue( 0.0 );
#ifdef HAVE_CUDA
deviceVector.setValue( 1.0 );
deviceVector2.setValue( 0.0 );
#endif
};
const int elements = hostMatrix.getNumberOfNonzeroMatrixElements();
const double datasetSize = (double) elements * ( 2 * sizeof( Real ) + sizeof( int ) ) / oneGB;
// compute functions
auto spmvHost = [&]() {
hostMatrix.vectorProduct( hostVector, hostVector2 );
};
#ifdef HAVE_CUDA
auto spmvCuda = [&]() {
deviceMatrix.vectorProduct( deviceVector, deviceVector2 );
};
auto spmvCusparse = [&]() {
cusparseCSR.vectorProduct( deviceVector, deviceVector2 );
};
#endif
benchmark.setOperation( datasetSize );
benchmark.time< Devices::Host >( reset, "CPU", spmvHost );
// Initialize the host vector to be compared.
// (The values in hostVector2 will be reset when spmvCuda starts)
HostVector resultHostVector2;
resultHostVector2.setSize( hostVector2.getSize() );
resultHostVector2.setValue( 0.0 );
// Copy the values
resultHostVector2 = hostVector2;
// Setup cuSPARSE MetaData, since it has the same header as CSR,
// and therefore will not get its own headers (rows, cols, speedup etc.) in log.
// * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten.
benchmark.setMetadataColumns( Benchmark::MetadataColumns({
{ "matrix name", convertToString( getMatrixFileName( inputFileName ) ) },
{ "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) },
{ "rows", convertToString( hostMatrix.getRows() ) },
{ "columns", convertToString( hostMatrix.getColumns() ) },
{ "matrix format", convertToString( "CSR-cuSPARSE" ) }
} ));
#ifdef HAVE_CUDA
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda );
// Initialize the device vector to be compared.
// (The values in deviceVector2 will be reset when spmvCusparse starts)
HostVector resultDeviceVector2;
resultDeviceVector2.setSize( deviceVector2.getSize() );
resultDeviceVector2.setValue( 0.0 );
resultDeviceVector2 = deviceVector2;
benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse );
HostVector resultcuSPARSEDeviceVector2;
resultcuSPARSEDeviceVector2.setSize( deviceVector2.getSize() );
resultcuSPARSEDeviceVector2.setValue( 0.0 );
resultcuSPARSEDeviceVector2 = deviceVector2;
// Difference between GPU (curent format) and GPU-cuSPARSE results
//Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 );
Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) );
//Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 );
Real cuSparseDifferenceLpNorm = lpNorm( resultDeviceVector2 - resultcuSPARSEDeviceVector2, 1 );
std::string GPUxGPUcuSparse_resultDifferenceAbsMax = "GPUxGPUcuSPARSE differenceAbsMax = " + std::to_string( cuSparseDifferenceAbsMax );
std::string GPUxGPUcuSparse_resultDifferenceLpNorm = "GPUxGPUcuSPARSE differenceLpNorm = " + std::to_string( cuSparseDifferenceLpNorm );
char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ];
char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ];
// Difference between CPU and GPU results for the current format
//Real differenceAbsMax = resultHostVector2.differenceAbsMax( resultDeviceVector2 );
Real differenceAbsMax = max( abs( resultHostVector2 - resultDeviceVector2 ) );
//Real differenceLpNorm = resultHostVector2.differenceLpNorm( resultDeviceVector2, 1 );
Real differenceLpNorm = lpNorm( resultHostVector2 - resultDeviceVector2, 1 );
std::string CPUxGPU_resultDifferenceAbsMax = "CPUxGPU differenceAbsMax = " + std::to_string( differenceAbsMax );
std::string CPUxGPU_resultDifferenceLpNorm = "CPUxGPU differenceLpNorm = " + std::to_string( differenceLpNorm );
char *CPUxGPU_absMax = &CPUxGPU_resultDifferenceAbsMax[ 0u ];
char *CPUxGPU_lpNorm = &CPUxGPU_resultDifferenceLpNorm[ 0u ];
// Print result differences of CPU and GPU of current format
std::cout << CPUxGPU_absMax << std::endl;
std::cout << CPUxGPU_lpNorm << std::endl;
// Print result differences of GPU of current format and GPU with cuSPARSE.
std::cout << GPUcuSparse_absMax << std::endl;
std::cout << GPUcuSparse_lpNorm << std::endl;
#endif
std::cout << std::endl;
return true;
}
template< typename Real = double,
typename Index = int >
bool
benchmarkSpmvSynthetic( Benchmark& benchmark,
const String& inputFileName,
bool verboseMR )
{
bool result = true;
result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR );
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR );
result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR );
result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR );
// AdEllpack is broken
// result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR );
result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR );
return result;
}
} // namespace Benchmarks
} // namespace TNL
/***************************************************************************
tnl-benchmark-spmv.cpp - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
begin : March 3, 2019
copyright : (C) 2019 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-spmv.h"
/***************************************************************************
tnl-benchmark-spmv.cu - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
begin : March 3, 2019
copyright : (C) 2019 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-spmv.h"
This diff is collapsed.
......@@ -12,31 +12,32 @@ BENCHMARK_DBG="tnl-benchmark-spmv-dbg"
export CUDA_PROFILE_CONFIG="$IWD/cuda-profiler.conf"
PROCESS_CUDA_PROFILE="$IWD/process-cuda-profile.pl"
source matrix-market
#source matrix-market
source florida-matrix-market
for link in $MM_MATRICES;
do
echo "======================================================================================================"
matrix=matrices`echo $link | sed 's/ftp:\/\/math.nist.gov\/pub//'`
unzipped_matrix=`echo $matrix | sed 's/.gz//'`
if test ! -e $matrix;
then
echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first."
#echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first." >> sparse-matrix-benchmark.log
else
gunzip -c ${matrix} > ${unzipped_matrix}
echo "Benchmarking with the matrix $unzipped_matrix ..."
export CUDA_PROFILE_LOG=$unzipped_matrix.float.log
if test x$DEBUG = xyes;
then
gdb --args ${BENCHMARK_DBG} --test mtx --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
else
$BENCHMARK --test mtx --input-file $unzipped_matrix --pdf-file $unzipped_matrix.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
fi
#perl $PROCESS_CUDA_PROFILE $unzipped_matrix.float.log sparse-matrix-profiling-float.log
fi
done
# !!!Matrices in MatrixMarket2 don't load properly, formatting issues with every file. MatrixReader fails.
#for link in $MM_MATRICES;
#do
# echo "======================================================================================================"
# matrix=matrices`echo $link | sed 's/ftp:\/\/math.nist.gov\/pub//'`
# unzipped_matrix=`echo $matrix | sed 's/.gz//'`
# if test ! -e $matrix;
# then
# echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first."
# #echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first." >> sparse-matrix-benchmark.log
# else
# gunzip -c ${matrix} > ${unzipped_matrix}
# echo "Benchmarking with the matrix $unzipped_matrix ..."
# export CUDA_PROFILE_LOG=$unzipped_matrix.float.log
# if test x$DEBUG = xyes;
# then
# gdb --args ${BENCHMARK_DBG} --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --verbose 1
# else
# $BENCHMARK --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --verbose 1
# fi
# #perl $PROCESS_CUDA_PROFILE $unzipped_matrix.float.log sparse-matrix-profiling-float.log
# fi
#done
for link in $FLORIDA_MM_MATRICES;
do
......@@ -51,17 +52,23 @@ do
cd $DIRNAME
tar zxvf $FILENAME
cd $IWD
if [ ! -d "log-files" ];
then
mkdir log-files
fi
SUBDIRNAME=`echo $FILENAME | sed 's/.tar.gz//'`
rm -f $DIRNAME/$SUBDIRNAME/*_b.mtx # these are usualy in array format
for file in $DIRNAME/$SUBDIRNAME/*.mtx;
do
echo "======================================================================================================"
echo "Benchmarking with the matrix $file ..."
mtx_file_name=`basename $file`
mtx_file_name=${mtx_file_name%.mtx}
if test x$DEBUG = xyes;
then
gdb --args $BENCHMARK --test mtx --input-file $file --pdf-file $file.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
gdb --args $BENCHMARK --input-file $file --log-file log-files/sparse-matrix-benchmark.log --output-mode append --verbose 1
else
$BENCHMARK --test mtx --input-file $file --pdf-file $file.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
$BENCHMARK --input-file $file --log-file log-files/sparse-matrix-benchmark.log --output-mode append --verbose 1
fi
done
fi
......
......@@ -27,56 +27,94 @@ namespace Matrices {
template< typename Device >
class AdEllpackDeviceDependentCode;
template< typename MatrixType >
struct warpInfo
{
int offset;
int rowOffset;
int localLoad;
int reduceMap[ 32 ];
warpInfo* next;
warpInfo* previous;
using RealType = typename MatrixType::RealType;
using DeviceType = typename MatrixType::DeviceType;
using IndexType = typename MatrixType::IndexType;
IndexType offset;
IndexType rowOffset;
IndexType localLoad;
IndexType reduceMap[ 32 ];
warpInfo< MatrixType >* next;
warpInfo< MatrixType >* previous;
};
template< typename MatrixType >
class warpList
{
public:
using RealType = typename MatrixType::RealType;
using DeviceType = typename MatrixType::DeviceType;
using IndexType = typename MatrixType::IndexType;
warpList();
bool addWarp( const int offset,
const int rowOffset,
const int localLoad,
const int* reduceMap );
bool addWarp( const IndexType offset,
const IndexType rowOffset,
const IndexType localLoad,
const IndexType* reduceMap );
warpInfo* splitInHalf( warpInfo* warp );
warpInfo< MatrixType >* splitInHalf( warpInfo< MatrixType >* warp );
int getNumberOfWarps()
IndexType getNumberOfWarps()
{ return this->numberOfWarps; }
warpInfo* getNextWarp( warpInfo* warp )
warpInfo< MatrixType >* getNextWarp( warpInfo< MatrixType >* warp )
{ return warp->next; }
warpInfo* getHead()
warpInfo< MatrixType >* getHead()
{ return this->head; }
warpInfo* getTail()
warpInfo< MatrixType >* getTail()
{ return this->tail; }
~warpList();
void printList()
{
if( this->getHead() == this->getTail() )
std::cout << "HEAD==TAIL" << std::endl;
else
{
for( warpInfo< MatrixType >* i = this->getHead(); i != this->getTail()->next; i = i->next )
{
if( i == this->getHead() )
std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
else if( i == this->getTail() )
std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
else
std::cout << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
}
std::cout << std::endl;
}
}
private:
int numberOfWarps;
IndexType numberOfWarps;
warpInfo* head;
warpInfo* tail;
warpInfo< MatrixType >* head;
warpInfo< MatrixType >* tail;
};
template< typename Real, typename Device, typename Index >
class AdEllpack : public Sparse< Real, Device, Index >
{
private:
// convenient template alias for controlling the selection of copy-assignment operator
template< typename Device2 >
using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
// friend class will be needed for templated assignment operators
template< typename Real2, typename Device2, typename Index2 >
friend class AdEllpack;
public:
typedef Real RealType;
......@@ -102,9 +140,15 @@ public:
IndexType getRowLength( const IndexType row ) const;
template< typename Real2, typename Device2, typename Index2 >
bool setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
void reset();
template< typename Real2, typename Device2, typename Index2 >
bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
template< typename Real2, typename Device2, typename Index2 >
bool operator != ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
bool setElement( const IndexType row,
const IndexType column,
......@@ -142,7 +186,15 @@ public:
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const;
// copy assignment
AdEllpack& operator=( const AdEllpack& matrix );
// cross-device copy assignment
template< typename Real2, typename Device2, typename Index2,
typename = typename Enabler< Device2 >::type >
AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );
void save( File& file ) const;
void load( File& file );
......@@ -155,13 +207,13 @@ public:
bool balanceLoad( const RealType average,
ConstCompressedRowLengthsVectorView rowLengths,
warpList* list );
warpList< AdEllpack >* list );
void computeWarps( const IndexType SMs,
const IndexType threadsPerSM,
warpList* list );
warpList< AdEllpack >* list );
bool createArrays( warpList* list );
bool createArrays( warpList< AdEllpack >* list );
void performRowTest();
......
This diff is collapsed.
......@@ -28,9 +28,19 @@ namespace TNL {
template< typename Device >
class BiEllpackDeviceDependentCode;
template< typename Real, typename Device = Devices::Cuda, typename Index = int, int StripSize = 32 >
template< typename Real, typename Device, typename Index >
class BiEllpack : public Sparse< Real, Device, Index >
{
private:
// convenient template alias for controlling the selection of copy-assignment operator
template< typename Device2 >
using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
// friend class will be needed for templated assignment operators
template< typename Real2, typename Device2, typename Index2 >
friend class BiEllpack;
public:
typedef Real RealType;
typedef Device DeviceType;
......@@ -57,7 +67,15 @@ public:
template< typename Real2,
typename Device2,
typename Index2 >
bool setLike( const BiEllpack< Real2, Device2, Index2, StripSize >& matrix );
void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix );
void reset();
template< typename Real2, typename Device2, typename Index2 >
bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
template< typename Real2, typename Device2, typename Index2 >
bool operator != ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
void getRowLengths( CompressedRowLengthsVector& rowLengths ) const;
......@@ -124,8 +142,14 @@ public:
IndexType getNumberOfGroups( const IndexType row ) const;
bool vectorProductTest() const;
// copy assignment
BiEllpack& operator=( const BiEllpack& matrix );
void reset();
// cross-device copy assignment
template< typename Real2, typename Device2, typename Index2,
typename = typename Enabler< Device2 >::type >
BiEllpack& operator=( const BiEllpack< Real2, Device2, Index2 >& matrix );
void save( File& file ) const;
......@@ -136,11 +160,13 @@ public:
void load( const String& fileName );
void print( std::ostream& str ) const;
void printValues() const;
void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths );
void computeColumnSizes( Containers::Vector< Index, Device, Index >& tempRowLengths );
// void verifyRowLengths( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths );
// void verifyRowLengths( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths );
template< typename InVector,
typename OutVector >
......@@ -157,11 +183,11 @@ public:
IndexType getStripLength( const IndexType strip ) const;
__cuda_callable__
void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths,
const IndexType strip );
__cuda_callable__
void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths,
const IndexType numberOfStrips,
const IndexType strip );
......@@ -171,6 +197,8 @@ public:
typedef BiEllpackDeviceDependentCode< DeviceType > DeviceDependentCode;
friend class BiEllpackDeviceDependentCode< DeviceType >;
friend class BiEllpack< RealType, Devices::Host, IndexType >;
friend class BiEllpack< RealType, Devices::Cuda, IndexType >;
private:
......
......@@ -45,6 +45,30 @@ BiEllpackSymmetric< Real, Device, Index, StripSize >::BiEllpackSymmetric()
logWarpSize( 5 )
{}
template< typename Real,
typename Device,
typename Index,
int StripSize >
String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType()
{
return String( "Matrices::BiEllpackMatrix< ") +
String( TNL::getType< Real >() ) +
String( ", " ) +
String( Device :: getDeviceType() ) +
String( ", " ) +
String( TNL::getType< Index >() ) +
String( " >" );
}
template< typename Real,
typename Device,
typename Index,
int StripSize >
String BiEllpackSymmetric< Real, Device, Index, StripSize >::getTypeVirtual() const
{
return this->getType();
}
template< typename Real,
typename Device,
typename Index,
......
This diff is collapsed.
......@@ -27,6 +27,28 @@ COOMatrix< Real, Device, Index >::COOMatrix()
{
};
template< typename Real,
typename Device,
typename Index >
String COOMatrix< Real, Device, Index >::getType()
{
return String( "Matrices::COOMatrix< " ) +
String( TNL::getType< Real>() ) +
String( ", " ) +
String( Device :: getDeviceType() ) +
String( ", " ) +
String( TNL::getType< Index >() ) +
String( " >" );
}
template< typename Real,
typename Device,
typename Index >
String COOMatrix< Real, Device, Index >::getTypeVirtual() const
{
return this->getType();
}
template< typename Real,
typename Device,
typename Index >
......
......@@ -45,8 +45,8 @@ String CSR< Real, Device, Index >::getSerializationType()
{
return String( "Matrices::CSR< ") +
TNL::getType< Real>() +
String( ", " ) +
getType< Devices::Host >() +
", [any_device], " +
String( TNL::getType< Index >() ) +
String( " >" );
}
......@@ -122,41 +122,8 @@ template< typename Real,
Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) const
{
// TODO: Fix/Implement
throw Exceptions::NotImplementedError( "CSR::getNonZeroRowLength is not implemented." );
// if( std::is_same< DeviceType, Devices::Host >::value )
// {
// ConstMatrixRow matrixRow = this->getRow( row );
// return matrixRow.getNonZeroElementsCount();
// }
// if( std::is_same< DeviceType, Devices::Cuda >::value )
// {
// IndexType *cols = new IndexType[4];
// RealType *vals = new RealType[4];
// for( int i = 0; i < 4; i++ )
// {
// cols[i] = i;
// vals[i] = 1.0;
// }
// ConstMatrixRow matrixRow(cols, vals, 4, 1);
// // ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
// // WHEN debugging with GDB:
// // (gdb) p this->rowPointers[0]
// // Could not find operator[].
// // (gdb) p rowPointers.getElement(0)
// // Attempt to take address of value not located in memory.
// IndexType resultHost ( 0 );
// IndexType* resultCuda = Cuda::passToDevice( resultHost );
// // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
// // 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
// TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
// delete []cols;
// delete []vals;
// std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
// resultHost = Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
// std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
// Cuda::freeFromDevice( resultCuda );
// return resultHost;
// }
TNL_ASSERT( false, std::cerr << "TODO: Fix/Implement" );
return 0;
}
template< typename Real,
......@@ -221,13 +188,6 @@ bool CSR< Real, Device, Index >::addElementFast( const IndexType row,
const RealType& value,
const RealType& thisElementMultiplicator )
{
/*TNL_ASSERT( row >= 0 && row < this->rows &&
column >= 0 && column <= this->rows,
std::cerr << " row = " << row
<< " column = " << column
<< " this->rows = " << this->rows
<< " this->columns = " << this-> columns );*/
IndexType elementPtr = this->rowPointers[ row ];
const IndexType rowEnd = this->rowPointers[ row + 1 ];
IndexType col = 0;
......
......@@ -75,6 +75,11 @@ public:
typedef tnlChunkedEllpackSliceInfo< IndexType > ChunkedEllpackSliceInfo;
typedef typename Sparse< RealType, DeviceType, IndexType >:: CompressedRowLengthsVector CompressedRowLengthsVector;
typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView;
typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector;
typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector;
typedef ChunkedEllpack< Real, Device, Index > ThisType;
typedef ChunkedEllpack< Real, Devices::Host, Index > HostType;
typedef ChunkedEllpack< Real, Devices::Cuda, Index > CudaType;
typedef Sparse< Real, Device, Index > BaseType;
typedef typename BaseType::MatrixRow MatrixRow;
typedef SparseRow< const RealType, const IndexType > ConstMatrixRow;
......
This diff is collapsed.