Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tnl-dev
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
TNL
tnl-dev
Commits
2ebb1334
There was an error fetching the commit references. Please try again later.
Commit
2ebb1334
authored
6 years ago
by
Lukas Cejka
Committed by
Tomáš Oberhuber
5 years ago
Browse files
Options
Downloads
Patches
Plain Diff
Implemented rought version of SpMV Benchmark for mtx files.
parent
aa627012
No related branches found
No related tags found
1 merge request
!45
Matrices revision
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/Benchmarks/SpMV/spmv.h
+75
-105
75 additions, 105 deletions
src/Benchmarks/SpMV/spmv.h
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+10
-31
10 additions, 31 deletions
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
with
85 additions
and
136 deletions
src/Benchmarks/SpMV/spmv.h
+
75
−
105
View file @
2ebb1334
...
...
@@ -31,66 +31,21 @@ template< typename Real, typename Device, typename Index >
using
SlicedEllpack
=
Matrices
::
SlicedEllpack
<
Real
,
Device
,
Index
>
;
template
<
typename
Matrix
>
int
setHostTestMatrix
(
Matrix
&
matrix
,
const
int
elementsPerRow
)
void
printMatrixInfo
(
const
String
&
inputFileName
,
const
Matrix
&
matrix
,
std
::
ostream
&
str
)
{
const
int
size
=
matrix
.
getRows
();
int
elements
(
0
);
for
(
int
row
=
0
;
row
<
size
;
row
++
)
{
int
col
=
row
-
elementsPerRow
/
2
;
for
(
int
element
=
0
;
element
<
elementsPerRow
;
element
++
)
{
if
(
col
+
element
>=
0
&&
col
+
element
<
size
)
{
matrix
.
setElement
(
row
,
col
+
element
,
element
+
1
);
elements
++
;
}
}
}
return
elements
;
}
#ifdef HAVE_CUDA
template
<
typename
Matrix
>
__global__
void
setCudaTestMatrixKernel
(
Matrix
*
matrix
,
const
int
elementsPerRow
,
const
int
gridIdx
)
{
const
int
rowIdx
=
(
gridIdx
*
Devices
::
Cuda
::
getMaxGridSize
()
+
blockIdx
.
x
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
rowIdx
>=
matrix
->
getRows
()
)
return
;
int
col
=
rowIdx
-
elementsPerRow
/
2
;
for
(
int
element
=
0
;
element
<
elementsPerRow
;
element
++
)
{
if
(
col
+
element
>=
0
&&
col
+
element
<
matrix
->
getColumns
()
)
matrix
->
setElementFast
(
rowIdx
,
col
+
element
,
element
+
1
);
}
}
#endif
template
<
typename
Matrix
>
void
setCudaTestMatrix
(
Matrix
&
matrix
,
const
int
elementsPerRow
)
{
#ifdef HAVE_CUDA
typedef
typename
Matrix
::
IndexType
IndexType
;
typedef
typename
Matrix
::
RealType
RealType
;
Pointers
::
DevicePointer
<
Matrix
>
kernel_matrix
(
matrix
);
dim3
cudaBlockSize
(
256
),
cudaGridSize
(
Devices
::
Cuda
::
getMaxGridSize
()
);
const
IndexType
cudaBlocks
=
roundUpDivision
(
matrix
.
getRows
(),
cudaBlockSize
.
x
);
const
IndexType
cudaGrids
=
roundUpDivision
(
cudaBlocks
,
Devices
::
Cuda
::
getMaxGridSize
()
);
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
if
(
gridIdx
==
cudaGrids
-
1
)
cudaGridSize
.
x
=
cudaBlocks
%
Devices
::
Cuda
::
getMaxGridSize
();
setCudaTestMatrixKernel
<
Matrix
>
<<<
cudaGridSize
,
cudaBlockSize
>>>
(
&
kernel_matrix
.
template
modifyData
<
Devices
::
Cuda
>(),
elementsPerRow
,
gridIdx
);
TNL_CHECK_CUDA_DEVICE
;
}
#endif
// Get only the name of the format from getType().
std
::
string
mtrxFullType
=
matrix
.
getType
();
std
::
string
mtrxType
=
mtrxFullType
.
substr
(
0
,
mtrxFullType
.
find
(
"<"
));
std
::
string
type
=
mtrxType
.
substr
(
mtrxType
.
find
(
':'
)
+
2
);
str
<<
"
\n
Format: "
<<
type
<<
std
::
endl
;
str
<<
" Rows: "
<<
matrix
.
getRows
()
<<
std
::
endl
;
str
<<
" Cols: "
<<
matrix
.
getColumns
()
<<
std
::
endl
;
str
<<
" Nonzero Elements: "
<<
matrix
.
getNumberOfNonzeroMatrixElements
()
<<
std
::
endl
;
}
// TODO: rename as benchmark_SpMV_synthetic and move to spmv-synthetic.h
template
<
typename
Real
,
template
<
typename
,
typename
,
typename
>
class
Matrix
,
...
...
@@ -109,52 +64,67 @@ benchmarkSpMV( Benchmark & benchmark,
HostVector
hostVector
,
hostVector2
;
CudaVector
deviceVector
,
deviceVector2
;
if
(
!
MatrixReader
<
HostMatrix
>::
readMtxFile
(
inputFileName
,
hostMatrix
)
)
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
else
{
#ifdef HAVE_CUDA
if
(
!
MatrixReader
<
DeviceMatrix
>::
readMtxFile
(
inputFileName
,
deviceMatrix
)
)
try
{
if
(
!
MatrixReader
<
HostMatrix
>::
readMtxFile
(
inputFileName
,
hostMatrix
)
)
{
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
#endif
hostVector
.
setSize
(
hostMatrix
.
getColumns
()
);
hostVector2
.
setSize
(
hostMatrix
.
getRows
()
);
#ifdef HAVE_CUDA
deviceVector
.
setSize
(
deviceMatrix
.
getColumns
()
);
deviceVector2
.
setSize
(
deviceMatrix
.
getRows
()
);
#endif
// reset function
auto
reset
=
[
&
]()
{
hostVector
.
setValue
(
1.0
);
hostVector2
.
setValue
(
0.0
);
#ifdef HAVE_CUDA
deviceVector
.
setValue
(
1.0
);
deviceVector2
.
setValue
(
0.0
);
#endif
};
const
int
elements
=
hostMatrix
.
getNumberOfNonzeroMatrixElements
();
const
double
datasetSize
=
(
double
)
elements
*
(
2
*
sizeof
(
Real
)
+
sizeof
(
int
)
)
/
oneGB
;
// compute functions
auto
spmvHost
=
[
&
]()
{
hostMatrix
.
vectorProduct
(
hostVector
,
hostVector2
);
};
auto
spmvCuda
=
[
&
]()
{
deviceMatrix
.
vectorProduct
(
deviceVector
,
deviceVector2
);
};
benchmark
.
setOperation
(
datasetSize
);
benchmark
.
time
<
Devices
::
Host
>
(
reset
,
"CPU"
,
spmvHost
);
#ifdef HAVE_CUDA
benchmark
.
time
<
Devices
::
Cuda
>
(
reset
,
"GPU"
,
spmvCuda
);
#endif
return
true
;
}
return
false
;
}
}
catch
(
std
::
bad_alloc
)
{
std
::
cerr
<<
"Not enough memory to read the matrix."
<<
std
::
endl
;
return
false
;
}
printMatrixInfo
(
inputFileName
,
hostMatrix
,
std
::
cout
);
#ifdef HAVE_CUDA
// FIXME: This doesn't work for ChunkedEllpack, because
// its cross-device assignment is not implemented yet.
deviceMatrix
=
hostMatrix
;
#endif
benchmark
.
setMetadataColumns
(
Benchmark
::
MetadataColumns
({
{
"rows"
,
convertToString
(
hostMatrix
.
getRows
()
)
},
{
"columns"
,
convertToString
(
hostMatrix
.
getColumns
()
)
}
}
));
hostVector
.
setSize
(
hostMatrix
.
getColumns
()
);
hostVector2
.
setSize
(
hostMatrix
.
getRows
()
);
#ifdef HAVE_CUDA
deviceVector
.
setSize
(
hostMatrix
.
getColumns
()
);
deviceVector2
.
setSize
(
hostMatrix
.
getRows
()
);
#endif
// reset function
auto
reset
=
[
&
]()
{
hostVector
.
setValue
(
1.0
);
hostVector2
.
setValue
(
0.0
);
#ifdef HAVE_CUDA
deviceVector
.
setValue
(
1.0
);
deviceVector2
.
setValue
(
0.0
);
#endif
};
const
int
elements
=
hostMatrix
.
getNumberOfNonzeroMatrixElements
();
const
double
datasetSize
=
(
double
)
elements
*
(
2
*
sizeof
(
Real
)
+
sizeof
(
int
)
)
/
oneGB
;
// compute functions
auto
spmvHost
=
[
&
]()
{
hostMatrix
.
vectorProduct
(
hostVector
,
hostVector2
);
};
auto
spmvCuda
=
[
&
]()
{
deviceMatrix
.
vectorProduct
(
deviceVector
,
deviceVector2
);
};
benchmark
.
setOperation
(
datasetSize
);
benchmark
.
time
<
Devices
::
Host
>
(
reset
,
"CPU"
,
spmvHost
);
#ifdef HAVE_CUDA
benchmark
.
time
<
Devices
::
Cuda
>
(
reset
,
"GPU"
,
spmvCuda
);
#endif
return
true
;
}
template
<
typename
Real
=
double
,
...
...
@@ -166,9 +136,9 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
bool
result
=
true
;
// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
result
|=
benchmarkSpMV
<
Real
,
Matrices
::
CSR
>
(
benchmark
,
inputFileName
);
//
result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark,
size, elementsPerRow
);
//
result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark,
size, elementsPerRow
);
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark,
size, elementsPerRow
);
result
|=
benchmarkSpMV
<
Real
,
Matrices
::
Ellpack
>
(
benchmark
,
inputFileName
);
result
|=
benchmarkSpMV
<
Real
,
SlicedEllpack
>
(
benchmark
,
inputFileName
);
// result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark,
inputFileName
);
return
result
;
}
...
...
This diff is collapsed.
Click to expand it.
src/Benchmarks/SpMV/tnl-benchmark-spmv.h
+
10
−
31
View file @
2ebb1334
...
...
@@ -43,29 +43,13 @@ runSpMVBenchmarks( Benchmark & benchmark,
Benchmark
::
MetadataMap
metadata
,
const
String
&
inputFileName
)
{
// DO: get rows and cols from inputFileName (/TNL/Matrices/MatrixReader_impl.h)
typedef
Matrices
::
CSR
<
Real
,
Devices
::
Host
,
int
>
CSRType
;
CSRType
csrMatrix
;
if
(
!
MatrixReader
<
CSRType
>::
readMtxFile
(
inputFileName
,
csrMatrix
)
)
std
::
cerr
<<
"I am not able to read the matrix file "
<<
inputFileName
<<
"."
<<
std
::
endl
;
else
{
const
std
::
size_t
rows
=
csrMatrix
.
getRows
();
const
std
::
size_t
cols
=
csrMatrix
.
getColumns
();
const
String
precision
=
getType
<
Real
>
();
metadata
[
"precision"
]
=
precision
;
// Sparse matrix-vector multiplication
benchmark
.
newBenchmark
(
String
(
"Sparse matrix-vector multiplication ("
)
+
precision
+
")"
,
metadata
);
benchmark
.
setMetadataColumns
(
Benchmark
::
MetadataColumns
({
{
"rows"
,
convertToString
(
rows
)
},
{
"columns"
,
convertToString
(
cols
)
}
}
));
benchmarkSpmvSynthetic
<
Real
>
(
benchmark
,
inputFileName
);
}
const
String
precision
=
getType
<
Real
>
();
metadata
[
"precision"
]
=
precision
;
// Sparse matrix-vector multiplication
benchmark
.
newBenchmark
(
String
(
"Sparse matrix-vector multiplication ("
)
+
precision
+
")"
,
metadata
);
benchmarkSpmvSynthetic
<
Real
>
(
benchmark
,
inputFileName
);
}
void
...
...
@@ -73,11 +57,11 @@ setupConfig( Config::ConfigDescription & config )
{
config
.
addDelimiter
(
"Benchmark settings:"
);
config
.
addRequiredEntry
<
String
>
(
"input-file"
,
"Input file name."
);
config
.
addEntry
<
String
>
(
"log-file"
,
"Log file name."
,
"tnl-benchmark-
blas
.log"
);
config
.
addEntry
<
String
>
(
"log-file"
,
"Log file name."
,
"tnl-benchmark-
spmv
.log"
);
config
.
addEntry
<
String
>
(
"output-mode"
,
"Mode for opening the log file."
,
"overwrite"
);
config
.
addEntryEnum
(
"append"
);
config
.
addEntryEnum
(
"overwrite"
);
config
.
addEntry
<
String
>
(
"precision"
,
"Precision of the arithmetics."
,
"
double
"
);
config
.
addEntry
<
String
>
(
"precision"
,
"Precision of the arithmetics."
,
"
all
"
);
config
.
addEntryEnum
(
"float"
);
config
.
addEntryEnum
(
"double"
);
config
.
addEntryEnum
(
"all"
);
...
...
@@ -110,11 +94,6 @@ main( int argc, char* argv[] )
const
String
&
logFileName
=
parameters
.
getParameter
<
String
>
(
"log-file"
);
const
String
&
outputMode
=
parameters
.
getParameter
<
String
>
(
"output-mode"
);
const
String
&
precision
=
parameters
.
getParameter
<
String
>
(
"precision"
);
// FIXME: getParameter< std::size_t >() does not work with parameters added with addEntry< int >(),
// which have a default value. The workaround below works for int values, but it is not possible
// to pass 64-bit integer values
// const std::size_t minSize = parameters.getParameter< std::size_t >( "min-size" );
// const std::size_t maxSize = parameters.getParameter< std::size_t >( "max-size" );
const
int
loops
=
parameters
.
getParameter
<
int
>
(
"loops"
);
const
int
verbose
=
parameters
.
getParameter
<
int
>
(
"verbose"
);
...
...
@@ -142,6 +121,6 @@ main( int argc, char* argv[] )
return
EXIT_FAILURE
;
}
std
::
cout
<<
"== BENCHMARK FINISHED =="
<<
std
::
endl
;
std
::
cout
<<
"
\n
== BENCHMARK FINISHED =="
<<
std
::
endl
;
return
EXIT_SUCCESS
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment