...
 
Commits (103)
/***************************************************************************
tnl-benchmark-spmv.cpp - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-old-spmv.h"
/***************************************************************************
tnl-benchmark-spmv.cu - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-old-spmv.h"
This diff is collapsed.
/***************************************************************************
tnlCusparseCSR.h - description
-------------------
begin : Jul 3, 2014
copyright : (C) 2014 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#ifdef NOT_USED_ANYMORE
#include <TNL/Assert.h>
#include <TNL/Devices/Cuda.h>
#ifdef HAVE_CUDA
#include <cusparse.h>
#endif
namespace TNL {
template< typename Real >
class CusparseCSRBase
{
public:
typedef Real RealType;
typedef Devices::Cuda DeviceType;
typedef Matrices::CSR< RealType, Devices::Cuda, int > MatrixType;
CusparseCSRBase()
: matrix( 0 )
{
};
#ifdef HAVE_CUDA
void init( const MatrixType& matrix,
cusparseHandle_t* cusparseHandle )
{
this->matrix = &matrix;
this->cusparseHandle = cusparseHandle;
cusparseCreateMatDescr( & this->matrixDescriptor );
};
#endif
int getRows() const
{
return matrix->getRows();
}
int getColumns() const
{
return matrix->getColumns();
}
int getNumberOfMatrixElements() const
{
return matrix->getNumberOfMatrixElements();
}
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->values.getSize(),
1.0,
this->matrixDescriptor,
this->matrix->values.getData(),
this->matrix->rowPointers.getData(),
this->matrix->columnIndexes.getData(),
inVector.getData(),
1.0,
outVector.getData() );
#endif
}
protected:
const MatrixType* matrix;
#ifdef HAVE_CUDA
cusparseHandle_t* cusparseHandle;
cusparseMatDescr_t matrixDescriptor;
#endif
};
template< typename Real >
class CusparseCSR
{};
template<>
class CusparseCSR< double > : public CusparseCSRBase< double >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
double d = 1.0;
double* alpha = &d;
cusparseDcsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
}
};
template<>
class CusparseCSR< float > : public CusparseCSRBase< float >
{
public:
template< typename InVector,
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const
{
TNL_ASSERT_TRUE( matrix, "matrix was not initialized" );
#ifdef HAVE_CUDA
float d = 1.0;
float* alpha = &d;
cusparseScsrmv( *( this->cusparseHandle ),
CUSPARSE_OPERATION_NON_TRANSPOSE,
this->matrix->getRows(),
this->matrix->getColumns(),
this->matrix->getValues().getSize(),
alpha,
this->matrixDescriptor,
this->matrix->getValues().getData(),
this->matrix->getRowPointers().getData(),
this->matrix->getColumnIndexes().getData(),
inVector.getData(),
alpha,
outVector.getData() );
#endif
}
};
} // namespace TNL
#endif
\ No newline at end of file
......@@ -155,5 +155,4 @@ class CusparseCSR< float > : public CusparseCSRBase< float >
}
};
} // namespace TNL
} // namespace TNL
\ No newline at end of file
This diff is collapsed.
/***************************************************************************
tnl-benchmark-spmv.cpp - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
begin : March 3, 2019
copyright : (C) 2019 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-spmv.h"
/***************************************************************************
tnl-benchmark-spmv.cu - description
-------------------
begin : Jun 5, 2014
copyright : (C) 2014 by Tomas Oberhuber
begin : March 3, 2019
copyright : (C) 2019 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
#include "tnl-benchmark-spmv.h"
This diff is collapsed.
......@@ -12,31 +12,32 @@ BENCHMARK_DBG="tnl-benchmark-spmv-dbg"
export CUDA_PROFILE_CONFIG="$IWD/cuda-profiler.conf"
PROCESS_CUDA_PROFILE="$IWD/process-cuda-profile.pl"
source matrix-market
#source matrix-market
source florida-matrix-market
for link in $MM_MATRICES;
do
echo "======================================================================================================"
matrix=matrices`echo $link | sed 's/ftp:\/\/math.nist.gov\/pub//'`
unzipped_matrix=`echo $matrix | sed 's/.gz//'`
if test ! -e $matrix;
then
echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first."
#echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first." >> sparse-matrix-benchmark.log
else
gunzip -c ${matrix} > ${unzipped_matrix}
echo "Benchmarking with the matrix $unzipped_matrix ..."
export CUDA_PROFILE_LOG=$unzipped_matrix.float.log
if test x$DEBUG = xyes;
then
gdb --args ${BENCHMARK_DBG} --test mtx --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
else
$BENCHMARK --test mtx --input-file $unzipped_matrix --pdf-file $unzipped_matrix.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
fi
#perl $PROCESS_CUDA_PROFILE $unzipped_matrix.float.log sparse-matrix-profiling-float.log
fi
done
# !!!Matrices in MatrixMarket2 don't load properly, formatting issues with every file. MatrixReader fails.
#for link in $MM_MATRICES;
#do
# echo "======================================================================================================"
# matrix=matrices`echo $link | sed 's/ftp:\/\/math.nist.gov\/pub//'`
# unzipped_matrix=`echo $matrix | sed 's/.gz//'`
# if test ! -e $matrix;
# then
# echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first."
# #echo "Matrix $matrix is missing !!! Run the script 'get-matrices' first." >> sparse-matrix-benchmark.log
# else
# gunzip -c ${matrix} > ${unzipped_matrix}
# echo "Benchmarking with the matrix $unzipped_matrix ..."
# export CUDA_PROFILE_LOG=$unzipped_matrix.float.log
# if test x$DEBUG = xyes;
# then
# gdb --args ${BENCHMARK_DBG} --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --verbose 1
# else
# $BENCHMARK --input-file $unzipped_matrix --log-file sparse-matrix-benchmark.log --verbose 1
# fi
# #perl $PROCESS_CUDA_PROFILE $unzipped_matrix.float.log sparse-matrix-profiling-float.log
# fi
#done
for link in $FLORIDA_MM_MATRICES;
do
......@@ -51,17 +52,23 @@ do
cd $DIRNAME
tar zxvf $FILENAME
cd $IWD
if [ ! -d "log-files" ];
then
mkdir log-files
fi
SUBDIRNAME=`echo $FILENAME | sed 's/.tar.gz//'`
rm -f $DIRNAME/$SUBDIRNAME/*_b.mtx # these are usualy in array format
for file in $DIRNAME/$SUBDIRNAME/*.mtx;
do
echo "======================================================================================================"
echo "Benchmarking with the matrix $file ..."
mtx_file_name=`basename $file`
mtx_file_name=${mtx_file_name%.mtx}
if test x$DEBUG = xyes;
then
gdb --args $BENCHMARK --test mtx --input-file $file --pdf-file $file.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
gdb --args $BENCHMARK --input-file $file --log-file log-files/sparse-matrix-benchmark.log --output-mode append --verbose 1
else
$BENCHMARK --test mtx --input-file $file --pdf-file $file.pdf --log-file sparse-matrix-benchmark.log --stop-time $STOP_TIME --verbose 1
$BENCHMARK --input-file $file --log-file log-files/sparse-matrix-benchmark.log --output-mode append --verbose 1
fi
done
fi
......
......@@ -150,7 +150,10 @@ public:
{
// CUDA does not have a native atomic load:
// https://stackoverflow.com/questions/32341081/how-to-have-atomic-load-in-cuda
return const_cast<Atomic*>(this)->fetch_add( 0 );
// const-cast on pointer fails in CUDA 10.1.105
// return const_cast<Atomic*>(this)->fetch_add( 0 );
return const_cast<Atomic&>(*this).fetch_add( 0 );
}
__cuda_callable__
......
......@@ -13,6 +13,9 @@
#include <type_traits>
#include <string.h>
//To be deleted
#include <iostream>
#include <TNL/Containers/Algorithms/ArrayOperations.h>
#include <TNL/Containers/Algorithms/Reduction.h>
#include <TNL/Containers/Algorithms/ReductionOperations.h>
......@@ -26,13 +29,21 @@ void
ArrayOperations< Devices::Host >::
allocateMemory( Element*& data,
const Index size )
{
data = new Element[ size ];
// According to the standard, new either throws, or returns non-nullptr.
// Some (old) compilers don't comply:
// https://stackoverflow.com/questions/550451/will-new-return-null-in-any-case
TNL_ASSERT_TRUE( data, "Operator 'new' returned a nullptr. This should never happen - there is "
"either a bug or the compiler does not comply to the standard." );
{
if ( Devices::SystemInfo::getFreeMemory() < ( size_t ) size * sizeof( Element ) )
{
data = 0;
throw std::bad_alloc();
}
else
{
data = new Element[ size ];
// According to the standard, new either throws, or returns non-nullptr.
// Some (old) compilers don't comply:
// https://stackoverflow.com/questions/550451/will-new-return-null-in-any-case
TNL_ASSERT_TRUE( data, "Operator 'new' returned a nullptr. This should never happen - there is "
"either a bug or the compiler does not comply to the standard." );
}
}
template< typename Element >
......
......@@ -23,7 +23,7 @@ namespace Devices {
inline String Cuda::getDeviceType()
{
return String( "Cuda" );
return String( "Devices::Cuda" );
}
inline void
......
......@@ -27,56 +27,95 @@ namespace Matrices {
template< typename Device >
class AdEllpackDeviceDependentCode;
template< typename MatrixType >
struct warpInfo
{
int offset;
int rowOffset;
int localLoad;
int reduceMap[ 32 ];
warpInfo* next;
warpInfo* previous;
using RealType = typename MatrixType::RealType;
using DeviceType = typename MatrixType::DeviceType;
using IndexType = typename MatrixType::IndexType;
IndexType offset;
IndexType rowOffset;
IndexType localLoad;
IndexType reduceMap[ 32 ];
warpInfo< MatrixType >* next;
warpInfo< MatrixType >* previous;
};
template< typename MatrixType >
class warpList
{
public:
using RealType = typename MatrixType::RealType;
using DeviceType = typename MatrixType::DeviceType;
using IndexType = typename MatrixType::IndexType;
warpList();
bool addWarp( const int offset,
const int rowOffset,
const int localLoad,
const int* reduceMap );
bool addWarp( const IndexType offset,
const IndexType rowOffset,
const IndexType localLoad,
const IndexType* reduceMap );
warpInfo* splitInHalf( warpInfo* warp );
warpInfo< MatrixType >* splitInHalf( warpInfo< MatrixType >* warp );
int getNumberOfWarps()
IndexType getNumberOfWarps()
{ return this->numberOfWarps; }
warpInfo* getNextWarp( warpInfo* warp )
warpInfo< MatrixType >* getNextWarp( warpInfo< MatrixType >* warp )
{ return warp->next; }
warpInfo* getHead()
warpInfo< MatrixType >* getHead()
{ return this->head; }
warpInfo* getTail()
warpInfo< MatrixType >* getTail()
{ return this->tail; }
~warpList();
void printList()
{
if( this->getHead() == this->getTail() )
std::cout << "HEAD==TAIL" << std::endl;
else
{
// TEST
for( warpInfo< MatrixType >* i = this->getHead(); i != this->getTail()->next; i = i->next )
{
if( i == this->getHead() );
// std::cout << "Head:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
else if( i == this->getTail() );
// std::cout << "Tail:" << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
else
std::cout << "\ti->localLoad = " << i->localLoad << "\ti->offset = " << i->offset << "\ti->rowOffset = " << i->rowOffset << std::endl;
}
std::cout << std::endl;
}
}
private:
int numberOfWarps;
IndexType numberOfWarps;
warpInfo* head;
warpInfo* tail;
warpInfo< MatrixType >* head;
warpInfo< MatrixType >* tail;
};
template< typename Real, typename Device, typename Index >
class AdEllpack : public Sparse< Real, Device, Index >
{
private:
// convenient template alias for controlling the selection of copy-assignment operator
template< typename Device2 >
using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
// friend class will be needed for templated assignment operators
template< typename Real2, typename Device2, typename Index2 >
friend class AdEllpack;
public:
typedef Real RealType;
......@@ -104,9 +143,15 @@ public:
IndexType getRowLength( const IndexType row ) const;
template< typename Real2, typename Device2, typename Index2 >
bool setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix );
void reset();
template< typename Real2, typename Device2, typename Index2 >
bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
template< typename Real2, typename Device2, typename Index2 >
bool operator != ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const;
bool setElement( const IndexType row,
const IndexType column,
......@@ -144,7 +189,15 @@ public:
typename OutVector >
void vectorProduct( const InVector& inVector,
OutVector& outVector ) const;
// copy assignment
AdEllpack& operator=( const AdEllpack& matrix );
// cross-device copy assignment
template< typename Real2, typename Device2, typename Index2,
typename = typename Enabler< Device2 >::type >
AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix );
bool save( File& file ) const;
bool load( File& file );
......@@ -157,13 +210,13 @@ public:
bool balanceLoad( const RealType average,
ConstCompressedRowLengthsVectorView rowLengths,
warpList* list );
warpList< ThisType >* list );
void computeWarps( const IndexType SMs,
const IndexType threadsPerSM,
warpList* list );
warpList< ThisType >* list );
bool createArrays( warpList* list );
bool createArrays( warpList< ThisType >* list );
void performRowTest();
......
This diff is collapsed.
......@@ -28,9 +28,19 @@ namespace TNL {
template< typename Device >
class BiEllpackDeviceDependentCode;
template< typename Real, typename Device = Devices::Cuda, typename Index = int, int StripSize = 32 >
template< typename Real, typename Device, typename Index >
class BiEllpack : public Sparse< Real, Device, Index >
{
private:
// convenient template alias for controlling the selection of copy-assignment operator
template< typename Device2 >
using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;
// friend class will be needed for templated assignment operators
template< typename Real2, typename Device2, typename Index2 >
friend class BiEllpack;
public:
typedef Real RealType;
typedef Device DeviceType;
......@@ -59,7 +69,15 @@ public:
template< typename Real2,
typename Device2,
typename Index2 >
bool setLike( const BiEllpack< Real2, Device2, Index2, StripSize >& matrix );
void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix );
void reset();
template< typename Real2, typename Device2, typename Index2 >
bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
template< typename Real2, typename Device2, typename Index2 >
bool operator != ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const;
void getRowLengths( CompressedRowLengthsVector& rowLengths ) const;
......@@ -126,8 +144,14 @@ public:
IndexType getNumberOfGroups( const IndexType row ) const;
bool vectorProductTest() const;
// copy assignment
BiEllpack& operator=( const BiEllpack& matrix );
void reset();
// cross-device copy assignment
template< typename Real2, typename Device2, typename Index2,
typename = typename Enabler< Device2 >::type >
BiEllpack& operator=( const BiEllpack< Real2, Device2, Index2 >& matrix );
bool save( File& file ) const;
......@@ -138,11 +162,13 @@ public:
bool load( const String& fileName );
void print( std::ostream& str ) const;
void printValues() const;
void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths );
void computeColumnSizes( Containers::Vector< Index, Device, Index >& tempRowLengths );
// void verifyRowLengths( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths );
// void verifyRowLengths( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths );
template< typename InVector,
typename OutVector >
......@@ -159,11 +185,11 @@ public:
IndexType getStripLength( const IndexType strip ) const;
__cuda_callable__
void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
void performRowBubbleSortCudaKernel( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths,
const IndexType strip );
__cuda_callable__
void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index, StripSize >::CompressedRowLengthsVector& rowLengths,
void computeColumnSizesCudaKernel( const typename BiEllpack< Real, Device, Index >::CompressedRowLengthsVector& rowLengths,
const IndexType numberOfStrips,
const IndexType strip );
......@@ -173,6 +199,8 @@ public:
typedef BiEllpackDeviceDependentCode< DeviceType > DeviceDependentCode;
friend class BiEllpackDeviceDependentCode< DeviceType >;
friend class BiEllpack< RealType, Devices::Host, IndexType >;
friend class BiEllpack< RealType, Devices::Cuda, IndexType >;
private:
......
......@@ -51,10 +51,12 @@ template< typename Real,
int StripSize >
String BiEllpackSymmetric< Real, Device, Index, StripSize >::getType()
{
return String( "BiEllpackMatrix< ") +
return String( "Matrices::BiEllpackMatrix< ") +
String( TNL::getType< Real >() ) +
String( ", " ) +
Device :: getDeviceType() +
String( Device :: getDeviceType() ) +
String( ", " ) +
String( TNL::getType< Index >() ) +
String( " >" );
}
......
This diff is collapsed.
......@@ -32,11 +32,13 @@ template< typename Real,
typename Index >
String COOMatrix< Real, Device, Index >::getType()
{
return String("COOMatrix< ") +
String(TNL::getType< Real>()) +
String(", ") +
Device::getDeviceType() +
String(" >");
return String( "Matrices::COOMatrix< " ) +
String( TNL::getType< Real>() ) +
String( ", " ) +
String( Device :: getDeviceType() ) +
String( ", " ) +
String( TNL::getType< Index >() ) +
String( " >" );
}
template< typename Real,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.