Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tnl-dev
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
TNL
tnl-dev
Commits
2ebb1334
There was an error fetching the commit references. Please try again later.
Commit
2ebb1334
authored
6 years ago
by
Lukas Cejka
Committed by
Tomáš Oberhuber
5 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Implemented rought version of SpMV Benchmark for mtx files.
parent
aa627012
No related branches found
No related tags found
1 merge request
!45
Matrices revision
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/Benchmarks/SpMV/spmv.h
+75
-105
75 additions, 105 deletions
src/Benchmarks/SpMV/spmv.h
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+10
-31
10 additions, 31 deletions
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
with
85 additions
and
136 deletions
src/Benchmarks/SpMV/spmv.h
+
75
−
105
View file @
2ebb1334
...
@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
...
@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
using
SlicedEllpack
=
Matrices
::
SlicedEllpack
<
Real
,
Device
,
Index
>
;
using
SlicedEllpack
=
Matrices
::
SlicedEllpack
<
Real
,
Device
,
Index
>
;
template
<
typename
Matrix
>
template
<
typename
Matrix
>
int
setHostTestMatrix
(
Matrix
&
matrix
,
void
printMatrixInfo
(
const
String
&
inputFileName
,
const
int
elementsPerRow
)
const
Matrix
&
matrix
,
std
::
ostream
&
str
)
{
{
const
int
size
=
matrix
.
getRows
();
// Get only the name of the format from getType().
int
elements
(
0
);
std
::
string
mtrxFullType
=
matrix
.
getType
();
for
(
int
row
=
0
;
row
<
size
;
row
++
)
{
std
::
string
mtrxType
=
mtrxFullType
.
substr
(
0
,
mtrxFullType
.
find
(
"<"
));
int
col
=
row
-
elementsPerRow
/
2
;
std
::
string
type
=
mtrxType
.
substr
(
mtrxType
.
find
(
':'
)
+
2
);
for
(
int
element
=
0
;
element
<
elementsPerRow
;
element
++
)
{
if
(
col
+
element
>=
0
&&
str
<<
"
\n
Format: "
<<
type
<<
std
::
endl
;
col
+
element
<
size
)
str
<<
" Rows: "
<<
matrix
.
getRows
()
<<
std
::
endl
;
{
str
<<
" Cols: "
<<
matrix
.
getColumns
()
<<
std
::
endl
;
matrix
.
setElement
(
row
,
col
+
element
,
element
+
1
);
str
<<
" Nonzero Elements: "
<<
matrix
.
getNumberOfNonzeroMatrixElements
()
<<
std
::
endl
;
elements
++
;
}
}
}
return
elements
;
}
#ifdef HAVE_CUDA
template
<
typename
Matrix
>
__global__
void
setCudaTestMatrixKernel
(
Matrix
*
matrix
,
const
int
elementsPerRow
,
const
int
gridIdx
)
{
const
int
rowIdx
=
(
gridIdx
*
Devices
::
Cuda
::
getMaxGridSize
()
+
blockIdx
.
x
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
rowIdx
>=
matrix
->
getRows
()
)
return
;
int
col
=
rowIdx
-
elementsPerRow
/
2
;
for
(
int
element
=
0
;
element
<
elementsPerRow
;
element
++
)
{
if
(
col
+
element
>=
0
&&
col
+
element
<
matrix
->
getColumns
()
)
matrix
->
setElementFast
(
rowIdx
,
col
+
element
,
element
+
1
);
}
}
#endif
template
<
typename
Matrix
>
void
setCudaTestMatrix
(
Matrix
&
matrix
,
const
int
elementsPerRow
)
{
#ifdef HAVE_CUDA
typedef
typename
Matrix
::
IndexType
IndexType
;
typedef
typename
Matrix
::
RealType
RealType
;
Pointers
::
DevicePointer
<
Matrix
>
kernel_matrix
(
matrix
);
dim3
cudaBlockSize
(
256
),
cudaGridSize
(
Devices
::
Cuda
::
getMaxGridSize
()
);
const
IndexType
cudaBlocks
=
roundUpDivision
(
matrix
.
getRows
(),
cudaBlockSize
.
x
);
const
IndexType
cudaGrids
=
roundUpDivision
(
cudaBlocks
,
Devices
::
Cuda
::
getMaxGridSize
()
);
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
if
(
gridIdx
==
cudaGrids
-
1
)
cudaGridSize
.
x
=
cudaBlocks
%
Devices
::
Cuda
::
getMaxGridSize
();
setCudaTestMatrixKernel
<
Matrix
>
<<<
cudaGridSize
,
cudaBlockSize
>>>
(
&
kernel_matrix
.
template
modifyData
<
Devices
::
Cuda
>(),
elementsPerRow
,
gridIdx
);
TNL_CHECK_CUDA_DEVICE
;
}
#endif
}
}
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template
<
typename
Real
,
template
<
typename
Real
,
template
<
typename
,
typename
,
typename
>
class
Matrix
,
template
<
typename
,
typename
,
typename
>
class
Matrix
,
...
@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
...
@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
HostVector
hostVector
,
hostVector2
;
HostVector
hostVector
,
hostVector2
;
CudaVector
deviceVector
,
deviceVector2
;
CudaVector
deviceVector
,
deviceVector2
;
if
(
!
MatrixReader
<
HostMatrix
>::
readMtxFile
(
inputFileName
,
hostMatrix
)
)
try
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
{
else
if
(
!
MatrixReader
<
HostMatrix
>::
readMtxFile
(
inputFileName
,
hostMatrix
)
)
{
{
#ifdef HAVE_CUDA
if
(
!
MatrixReader
<
DeviceMatrix
>::
readMtxFile
(
inputFileName
,
deviceMatrix
)
)
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
#endif
return
false
;
}
hostVector
.
setSize
(
hostMatrix
.
getColumns
()
);
}
hostVector2
.
setSize
(
hostMatrix
.
getRows
()
);
catch
(
std
::
bad_alloc
)
{
#ifdef HAVE_CUDA
std
::
cerr
<<
"Not enough memory to read the matrix."
<<
std
::
endl
;
deviceVector
.
setSize
(
deviceMatrix
.
getColumns
()
);
return
false
;
deviceVector2
.
setSize
(
deviceMatrix
.
getRows
()
);
}
#endif
printMatrixInfo
(
inputFileName
,
hostMatrix
,
std
::
cout
);
#ifdef HAVE_CUDA
// reset function
// FIXME: This doesn't work for ChunkedEllpack, because
auto
reset
=
[
&
]()
{
// its cross-device assignment is not implemented yet.
hostVector
.
setValue
(
1.0
);
deviceMatrix
=
hostMatrix
;
hostVector2
.
setValue
(
0.0
);
#endif
#ifdef HAVE_CUDA
deviceVector
.
setValue
(
1.0
);
benchmark
.
setMetadataColumns
(
Benchmark
::
MetadataColumns
({
deviceVector2
.
setValue
(
0.0
);
{
"rows"
,
convertToString
(
hostMatrix
.
getRows
()
)
},
#endif
{
"columns"
,
convertToString
(
hostMatrix
.
getColumns
()
)
}
};
}
));
const
int
elements
=
hostMatrix
.
getNumberOfNonzeroMatrixElements
();
hostVector
.
setSize
(
hostMatrix
.
getColumns
()
);
hostVector2
.
setSize
(
hostMatrix
.
getRows
()
);
const
double
datasetSize
=
(
double
)
elements
*
(
2
*
sizeof
(
Real
)
+
sizeof
(
int
)
)
/
oneGB
;
#ifdef HAVE_CUDA
// compute functions
deviceVector
.
setSize
(
hostMatrix
.
getColumns
()
);
auto
spmvHost
=
[
&
]()
{
deviceVector2
.
setSize
(
hostMatrix
.
getRows
()
);
hostMatrix
.
vectorProduct
(
hostVector
,
hostVector2
);
#endif
};
auto
spmvCuda
=
[
&
]()
{
// reset function
deviceMatrix
.
vectorProduct
(
deviceVector
,
deviceVector2
);
auto
reset
=
[
&
]()
{
};
hostVector
.
setValue
(
1.0
);
hostVector2
.
setValue
(
0.0
);
benchmark
.
setOperation
(
datasetSize
);
#ifdef HAVE_CUDA
benchmark
.
time
<
Devices
::
Host
>
(
reset
,
"CPU"
,
spmvHost
);
deviceVector
.
setValue
(
1.0
);
#ifdef HAVE_CUDA
deviceVector2
.
setValue
(
0.0
);
benchmark
.
time
<
Devices
::
Cuda
>
(
reset
,
"GPU"
,
spmvCuda
);
#endif
#endif
};
return
true
;
}
const
int
elements
=
hostMatrix
.
getNumberOfNonzeroMatrixElements
();
const
double
datasetSize
=
(
double
)
elements
*
(
2
*
sizeof
(
Real
)
+
sizeof
(
int
)
)
/
oneGB
;
// compute functions
auto
spmvHost
=
[
&
]()
{
hostMatrix
.
vectorProduct
(
hostVector
,
hostVector2
);
};
auto
spmvCuda
=
[
&
]()
{
deviceMatrix
.
vectorProduct
(
deviceVector
,
deviceVector2
);
};
benchmark
.
setOperation
(
datasetSize
);
benchmark
.
time
<
Devices
::
Host
>
(
reset
,
"CPU"
,
spmvHost
);
#ifdef HAVE_CUDA
benchmark
.
time
<
Devices
::
Cuda
>
(
reset
,
"GPU"
,
spmvCuda
);
#endif
return
true
;
}
}
template
<
typename
Real
=
double
,
template
<
typename
Real
=
double
,
...
@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
...
@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
bool
result
=
true
;
bool
result
=
true
;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
result
|=
benchmarkSpMV
<
Real
,
Matrices
::
CSR
>
(
benchmark
,
inputFileName
);
result
|=
benchmarkSpMV
<
Real
,
Matrices
::
CSR
>
(
benchmark
,
inputFileName
);
//
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark,
size, elementsPerRow
);
result
|=
benchmarkSpMV
<
Real
,
Matrices
::
Ellpack
>
(
benchmark
,
inputFileName
);
//
result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark,
size, elementsPerRow
);
result
|=
benchmarkSpMV
<
Real
,
SlicedEllpack
>
(
benchmark
,
inputFileName
);
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark,
size, elementsPerRow
);
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark,
inputFileName
);
return
result
;
return
result
;
}
}
...
...
This diff is collapsed.
Click to expand it.
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+
10
−
31
View file @
2ebb1334
...
@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
...
@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
Benchmark
::
MetadataMap
metadata
,
Benchmark
::
MetadataMap
metadata
,
const
String
&
inputFileName
)
const
String
&
inputFileName
)
{
{
// DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)
const
String
precision
=
getType
<
Real
>
();
metadata
[
"precision"
]
=
precision
;
typedef
Matrices
::
CSR
<
Real
,
Devices
::
Host
,
int
>
CSRType
;
CSRType
csrMatrix
;
// Sparse matrix-vector multiplication
benchmark
.
newBenchmark
(
String
(
"Sparse matrix-vector multiplication ("
)
+
precision
+
")"
,
if
(
!
MatrixReader
<
CSRType
>::
readMtxFile
(
inputFileName
,
csrMatrix
)
)
metadata
);
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
benchmarkSpmvSynthetic
<
Real
>
(
benchmark
,
inputFileName
);
else
{
const
std
::
size_t
rows
=
csrMatrix
.
getRows
();
const
std
::
size_t
cols
=
csrMatrix
.
getColumns
();
const
String
precision
=
getType
<
Real
>
();
metadata
[
"precision"
]
=
precision
;
// Sparse matrix-vector multiplication
benchmark
.
newBenchmark
(
String
(
"Sparse matrix-vector multiplication ("
)
+
precision
+
")"
,
metadata
);
benchmark
.
setMetadataColumns
(
Benchmark
::
MetadataColumns
({
{
"rows"
,
convertToString
(
rows
)
},
{
"columns"
,
convertToString
(
cols
)
}
}
));
benchmarkSpmvSynthetic
<
Real
>
(
benchmark
,
inputFileName
);
}
}
}
void
void
...
@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
...
@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
{
{
config
.
addDelimiter
(
"Benchmark settings:"
);
config
.
addDelimiter
(
"Benchmark settings:"
);
config
.
addRequiredEntry
<
String
>
(
"input-file"
,
"Input file name."
);
config
.
addRequiredEntry
<
String
>
(
"input-file"
,
"Input file name."
);
config
.
addEntry
<
String
>
(
"log-file"
,
"Log file name."
,
"tnl-benchmark-
blas
.log"
);
config
.
addEntry
<
String
>
(
"log-file"
,
"Log file name."
,
"tnl-benchmark-
spmv
.log"
);
config
.
addEntry
<
String
>
(
"output-mode"
,
"Mode for opening the log file."
,
"overwrite"
);
config
.
addEntry
<
String
>
(
"output-mode"
,
"Mode for opening the log file."
,
"overwrite"
);
config
.
addEntryEnum
(
"append"
);
config
.
addEntryEnum
(
"append"
);
config
.
addEntryEnum
(
"overwrite"
);
config
.
addEntryEnum
(
"overwrite"
);
config
.
addEntry
<
String
>
(
"precision"
,
"Precision of the arithmetics."
,
"
double
"
);
config
.
addEntry
<
String
>
(
"precision"
,
"Precision of the arithmetics."
,
"
all
"
);
config
.
addEntryEnum
(
"float"
);
config
.
addEntryEnum
(
"float"
);
config
.
addEntryEnum
(
"double"
);
config
.
addEntryEnum
(
"double"
);
config
.
addEntryEnum
(
"all"
);
config
.
addEntryEnum
(
"all"
);
...
@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
...
@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
const
String
&
logFileName
=
parameters
.
getParameter
<
String
>
(
"log-file"
);
const
String
&
logFileName
=
parameters
.
getParameter
<
String
>
(
"log-file"
);
const
String
&
outputMode
=
parameters
.
getParameter
<
String
>
(
"output-mode"
);
const
String
&
outputMode
=
parameters
.
getParameter
<
String
>
(
"output-mode"
);
const
String
&
precision
=
parameters
.
getParameter
<
String
>
(
"precision"
);
const
String
&
precision
=
parameters
.
getParameter
<
String
>
(
"precision"
);
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values
// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
const
int
loops
=
parameters
.
getParameter
<
int
>
(
"loops"
);
const
int
loops
=
parameters
.
getParameter
<
int
>
(
"loops"
);
const
int
verbose
=
parameters
.
getParameter
<
int
>
(
"verbose"
);
const
int
verbose
=
parameters
.
getParameter
<
int
>
(
"verbose"
);
...
@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
...
@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
return
EXIT_FAILURE
;
return
EXIT_FAILURE
;
}
}
std
::
cout
<<
"== BENCHMARK FINISHED =="
<<
std
::
endl
;
std
::
cout
<<
"
\n
== BENCHMARK FINISHED =="
<<
std
::
endl
;
return
EXIT_SUCCESS
;
return
EXIT_SUCCESS
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment