Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tnl-dev
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Model registry
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
TNL
tnl-dev
Commits
7398085f
There was an error fetching the commit references. Please try again later.
Commit
7398085f
authored
5 years ago
by
Tomáš Oberhuber
Browse files
Options
Downloads
Patches
Plain Diff
Rewritting the Merson solver using expression templates.
parent
5f87aba3
No related branches found
No related tags found
1 merge request
!34
Runge kutta
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/TNL/Solvers/ODE/Merson.h
+1
-1
1 addition, 1 deletion
src/TNL/Solvers/ODE/Merson.h
src/TNL/Solvers/ODE/Merson_impl.h
+147
-181
147 additions, 181 deletions
src/TNL/Solvers/ODE/Merson_impl.h
with
148 additions
and
182 deletions
src/TNL/Solvers/ODE/Merson.h
+
1
−
1
View file @
7398085f
...
@@ -65,7 +65,7 @@ class Merson : public ExplicitSolver< Problem >
...
@@ -65,7 +65,7 @@ class Merson : public ExplicitSolver< Problem >
void
writeGrids
(
const
DofVectorPointer
&
u
);
void
writeGrids
(
const
DofVectorPointer
&
u
);
DofVectorPointer
k1
,
k2
,
k3
,
k4
,
k5
,
kAux
;
DofVectorPointer
_
k1
,
_
k2
,
_
k3
,
_
k4
,
_
k5
,
_
kAux
;
/****
/****
* This controls the accuracy of the solver
* This controls the accuracy of the solver
...
...
This diff is collapsed.
Click to expand it.
src/TNL/Solvers/ODE/Merson_impl.h
+
147
−
181
View file @
7398085f
...
@@ -128,7 +128,7 @@ void Merson< Problem > :: setAdaptivity( const RealType& a )
...
@@ -128,7 +128,7 @@ void Merson< Problem > :: setAdaptivity( const RealType& a )
};
};
template
<
typename
Problem
>
template
<
typename
Problem
>
bool
Merson
<
Problem
>
::
solve
(
DofVectorPointer
&
u
)
bool
Merson
<
Problem
>::
solve
(
DofVectorPointer
&
_
u
)
{
{
if
(
!
this
->
problem
)
if
(
!
this
->
problem
)
{
{
...
@@ -143,19 +143,25 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
...
@@ -143,19 +143,25 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
/****
/****
* First setup the supporting meshes k1...k5 and kAux.
* First setup the supporting meshes k1...k5 and kAux.
*/
*/
k1
->
setLike
(
*
u
);
_k1
->
setLike
(
*
_u
);
k2
->
setLike
(
*
u
);
_k2
->
setLike
(
*
_u
);
k3
->
setLike
(
*
u
);
_k3
->
setLike
(
*
_u
);
k4
->
setLike
(
*
u
);
_k4
->
setLike
(
*
_u
);
k5
->
setLike
(
*
u
);
_k5
->
setLike
(
*
_u
);
kAux
->
setLike
(
*
u
);
_kAux
->
setLike
(
*
_u
);
k1
->
setValue
(
0.0
);
auto
k1
=
_k1
->
getView
();
k2
->
setValue
(
0.0
);
auto
k2
=
_k2
->
getView
();
k3
->
setValue
(
0.0
);
auto
k3
=
_k3
->
getView
();
k4
->
setValue
(
0.0
);
auto
k4
=
_k4
->
getView
();
k5
->
setValue
(
0.0
);
auto
k5
=
_k5
->
getView
();
kAux
->
setValue
(
0.0
);
auto
kAux
=
_kAux
->
getView
();
auto
u
=
_u
->
getView
();
k1
=
0.0
;
k2
=
0.0
;
k3
=
0.0
;
k4
=
0.0
;
k5
=
0.0
;
kAux
=
0.0
;
/****
/****
* Set necessary parameters
* Set necessary parameters
...
@@ -170,37 +176,69 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
...
@@ -170,37 +176,69 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
this
->
refreshSolverMonitor
();
this
->
refreshSolverMonitor
();
/****
/////
* Start the main loop
// Start the main loop
*/
while
(
this
->
checkNextIteration
()
)
while
(
this
->
checkNextIteration
()
)
{
{
/****
/////
* Compute Runge-Kutta coefficients
// Compute Runge-Kutta coefficients
*/
RealType
tau_3
=
currentTau
/
3.0
;
computeKFunctions
(
u
,
time
,
currentTau
);
/////
// k1
this
->
problem
->
getExplicitUpdate
(
time
,
currentTau
,
_u
,
_k1
);
/////
// k2
kAux
=
u
+
currentTau
*
(
1.0
/
3.0
*
k1
);
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
_kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
currentTau
,
_kAux
,
_k2
);
/////
// k3
kAux
=
u
+
currentTau
*
1.0
/
6.0
*
(
k1
+
k2
);
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
_kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
currentTau
,
_kAux
,
_k3
);
/////
// k4
kAux
=
u
+
currentTau
*
(
0.125
*
k1
+
0.375
*
k3
);
this
->
problem
->
applyBoundaryConditions
(
time
+
0.5
*
currentTau
,
_kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
0.5
*
currentTau
,
currentTau
,
_kAux
,
_k4
);
/////
// k5
kAux
=
u
+
currentTau
*
(
0.5
*
k1
-
1.5
*
k3
+
2.0
*
k4
);
this
->
problem
->
applyBoundaryConditions
(
time
+
currentTau
,
_kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
currentTau
,
currentTau
,
_kAux
,
_k5
);
if
(
this
->
testingMode
)
if
(
this
->
testingMode
)
writeGrids
(
u
);
writeGrids
(
_
u
);
/****
/////
* Compute an error of the approximation.
// Compute an error of the approximation.
*/
RealType
error
(
0.0
);
RealType
eps
(
0.0
);
if
(
adaptivity
!=
0.0
)
if
(
adaptivity
!=
0.0
)
eps
=
computeError
(
currentTau
);
{
const
RealType
localError
=
max
(
currentTau
/
3.0
*
abs
(
0.2
*
k1
-
0.9
*
k3
+
0.8
*
k4
-
0.1
*
k5
)
);
Problem
::
CommunicatorType
::
Allreduce
(
&
localError
,
&
error
,
1
,
MPI_MAX
,
Problem
::
CommunicatorType
::
AllGroup
);
}
if
(
adaptivity
==
0.0
||
e
ps
<
adaptivity
)
if
(
adaptivity
==
0.0
||
e
rror
<
adaptivity
)
{
{
RealType
lastResidue
=
this
->
getResidue
();
RealType
lastResidue
=
this
->
getResidue
();
RealType
newResidue
(
0.0
);
RealType
newResidue
(
0.0
);
time
+=
currentTau
;
time
+=
currentTau
;
computeNewTimeLevel
(
time
,
currentTau
,
u
,
newResidue
);
this
->
setResidue
(
newResidue
);
auto
reduction
=
[]
__cuda_callable__
(
RealType
&
a
,
const
RealType
&
b
)
{
a
+=
b
;
};
auto
volatileReduction
=
[]
__cuda_callable__
(
volatile
RealType
&
a
,
const
volatile
RealType
&
b
)
{
a
+=
b
;
};
/****
this
->
setResidue
(
addAndReduceAbs
(
u
,
currentTau
/
6.0
*
(
k1
+
4.0
*
k4
+
k5
),
* When time is close to stopTime the new residue
reduction
,
volatileReduction
,
(
RealType
)
0.0
)
/
(
currentTau
*
(
RealType
)
u
.
getSize
()
)
);
* may be inaccurate significantly.
*/
/////
// When time is close to stopTime the new residue
// may be inaccurate significantly.
if
(
abs
(
time
-
this
->
stopTime
)
<
1.0e-7
)
this
->
setResidue
(
lastResidue
);
if
(
abs
(
time
-
this
->
stopTime
)
<
1.0e-7
)
this
->
setResidue
(
lastResidue
);
...
@@ -209,12 +247,11 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
...
@@ -209,12 +247,11 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
}
}
this
->
refreshSolverMonitor
();
this
->
refreshSolverMonitor
();
/****
/////
* Compute the new time step.
// Compute the new time step.
*/
if
(
adaptivity
!=
0.0
&&
error
!=
0.0
)
if
(
adaptivity
!=
0.0
&&
eps
!=
0.0
)
{
{
currentTau
*=
0.8
*
::
pow
(
adaptivity
/
e
ps
,
0.2
);
currentTau
*=
0.8
*
::
pow
(
adaptivity
/
e
rror
,
0.2
);
currentTau
=
min
(
currentTau
,
this
->
getMaxTau
()
);
currentTau
=
min
(
currentTau
,
this
->
getMaxTau
()
);
#ifdef USE_MPI
#ifdef USE_MPI
TNLMPI
::
Bcast
(
currentTau
,
1
,
0
);
TNLMPI
::
Bcast
(
currentTau
,
1
,
0
);
...
@@ -225,11 +262,8 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
...
@@ -225,11 +262,8 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
else
this
->
tau
=
currentTau
;
else
this
->
tau
=
currentTau
;
/****
/////
* Check stop conditions.
// Check stop conditions.
*/
//cerr << "residue = " << residue << std::endl;
//cerr << "this->getConvergenceResidue() = " << this->getConvergenceResidue() << std::endl;
if
(
time
>=
this
->
getStopTime
()
||
if
(
time
>=
this
->
getStopTime
()
||
(
this
->
getConvergenceResidue
()
!=
0.0
&&
this
->
getResidue
()
<
this
->
getConvergenceResidue
()
)
)
(
this
->
getConvergenceResidue
()
!=
0.0
&&
this
->
getResidue
()
<
this
->
getConvergenceResidue
()
)
)
{
{
...
@@ -243,126 +277,57 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
...
@@ -243,126 +277,57 @@ bool Merson< Problem > :: solve( DofVectorPointer& u )
};
};
template
<
typename
Problem
>
template
<
typename
Problem
>
void
Merson
<
Problem
>::
computeKFunctions
(
DofVectorPointer
&
u
,
void
Merson
<
Problem
>::
computeKFunctions
(
DofVectorPointer
&
_
u
,
const
RealType
&
time
,
const
RealType
&
time
,
RealType
tau
)
RealType
tau
)
{
{
IndexType
size
=
u
->
getSize
();
auto
k1
=
_k1
->
getView
();
auto
k2
=
_k2
->
getView
();
RealType
*
_k1
=
k1
->
getData
();
auto
k3
=
_k3
->
getView
();
RealType
*
_k2
=
k2
->
getData
();
auto
k4
=
_k4
->
getView
();
RealType
*
_k3
=
k3
->
getData
();
auto
k5
=
_k5
->
getView
();
RealType
*
_k4
=
k4
->
getData
();
auto
kAux
=
_kAux
->
getView
();
RealType
*
_k5
=
k5
->
getData
();
auto
u
=
_u
->
getView
();
RealType
*
_kAux
=
kAux
->
getData
();
RealType
*
_u
=
u
->
getData
();
RealType
tau_3
=
tau
/
3.0
;
RealType
tau_3
=
tau
/
3.0
;
/////
if
(
std
::
is_same
<
DeviceType
,
Devices
::
Host
>::
value
)
// k1
{
this
->
problem
->
getExplicitUpdate
(
time
,
tau
,
_u
,
_k1
);
this
->
problem
->
getExplicitUpdate
(
time
,
tau
,
u
,
k1
);
/////
#ifdef HAVE_OPENMP
// k2
#pragma omp parallel for firstprivate( size, _kAux, _u, _k1, tau, tau_3 ) if( Devices::Host::isOMPEnabled() )
kAux
=
u
+
tau
*
(
1.0
/
3.0
*
k1
);
#endif
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
_kAux
);
for
(
IndexType
i
=
0
;
i
<
size
;
i
++
)
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
_kAux
,
_k2
);
_kAux
[
i
]
=
_u
[
i
]
+
tau
*
(
1.0
/
3.0
*
_k1
[
i
]
);
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
kAux
);
/////
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
kAux
,
k2
);
// k3
kAux
=
u
+
tau
*
1.0
/
6.0
*
(
k1
+
k2
);
#ifdef HAVE_OPENMP
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
_kAux
);
#pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k2, tau, tau_3 ) if( Devices::Host::isOMPEnabled() )
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
_kAux
,
_k3
);
#endif
for
(
IndexType
i
=
0
;
i
<
size
;
i
++
)
/////
_kAux
[
i
]
=
_u
[
i
]
+
tau
*
1.0
/
6.0
*
(
_k1
[
i
]
+
_k2
[
i
]
);
// k4
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
kAux
);
kAux
=
u
+
tau
*
(
0.125
*
k1
+
0.375
*
k3
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
kAux
,
k3
);
this
->
problem
->
applyBoundaryConditions
(
time
+
0.5
*
tau
,
_kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
0.5
*
tau
,
tau
,
_kAux
,
_k4
);
#ifdef HAVE_OPENMP
#pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, tau, tau_3 ) if( Devices::Host::isOMPEnabled() )
/////
#endif
// k5
for
(
IndexType
i
=
0
;
i
<
size
;
i
++
)
kAux
=
u
+
tau
*
(
0.5
*
k1
-
1.5
*
k3
+
2.0
*
k4
);
_kAux
[
i
]
=
_u
[
i
]
+
tau
*
(
0.125
*
_k1
[
i
]
+
0.375
*
_k3
[
i
]
);
this
->
problem
->
applyBoundaryConditions
(
time
+
tau
,
_kAux
);
this
->
problem
->
applyBoundaryConditions
(
time
+
0.5
*
tau
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau
,
tau
,
_kAux
,
_k5
);
this
->
problem
->
getExplicitUpdate
(
time
+
0.5
*
tau
,
tau
,
kAux
,
k4
);
#ifdef HAVE_OPENMP
#pragma omp parallel for firstprivate( size, _kAux, _u, _k1, _k3, _k4, tau, tau_3 ) if( Devices::Host::isOMPEnabled() )
#endif
for
(
IndexType
i
=
0
;
i
<
size
;
i
++
)
_kAux
[
i
]
=
_u
[
i
]
+
tau
*
(
0.5
*
_k1
[
i
]
-
1.5
*
_k3
[
i
]
+
2.0
*
_k4
[
i
]
);
this
->
problem
->
applyBoundaryConditions
(
time
+
tau
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau
,
tau
,
kAux
,
k5
);
}
if
(
std
::
is_same
<
DeviceType
,
Devices
::
Cuda
>::
value
)
{
#ifdef HAVE_CUDA
dim3
cudaBlockSize
(
512
);
const
IndexType
cudaBlocks
=
Devices
::
Cuda
::
getNumberOfBlocks
(
size
,
cudaBlockSize
.
x
);
const
IndexType
cudaGrids
=
Devices
::
Cuda
::
getNumberOfGrids
(
cudaBlocks
);
this
->
cudaBlockResidue
.
setSize
(
min
(
cudaBlocks
,
Devices
::
Cuda
::
getMaxGridSize
()
)
);
const
IndexType
threadsPerGrid
=
Devices
::
Cuda
::
getMaxGridSize
()
*
cudaBlockSize
.
x
;
this
->
problem
->
getExplicitUpdate
(
time
,
tau
,
u
,
k1
);
cudaDeviceSynchronize
();
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
const
IndexType
gridOffset
=
gridIdx
*
threadsPerGrid
;
const
IndexType
currentSize
=
min
(
size
-
gridOffset
,
threadsPerGrid
);
computeK2Arg
<<<
cudaBlocks
,
cudaBlockSize
>>>
(
currentSize
,
tau
,
&
_u
[
gridOffset
],
&
_k1
[
gridOffset
],
&
_kAux
[
gridOffset
]
);
}
cudaDeviceSynchronize
();
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
kAux
,
k2
);
cudaDeviceSynchronize
();
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
const
IndexType
gridOffset
=
gridIdx
*
threadsPerGrid
;
const
IndexType
currentSize
=
min
(
size
-
gridOffset
,
threadsPerGrid
);
computeK3Arg
<<<
cudaBlocks
,
cudaBlockSize
>>>
(
currentSize
,
tau
,
&
_u
[
gridOffset
],
&
_k1
[
gridOffset
],
&
_k2
[
gridOffset
],
&
_kAux
[
gridOffset
]
);
}
cudaDeviceSynchronize
();
this
->
problem
->
applyBoundaryConditions
(
time
+
tau_3
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau_3
,
tau
,
kAux
,
k3
);
cudaDeviceSynchronize
();
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
const
IndexType
gridOffset
=
gridIdx
*
threadsPerGrid
;
const
IndexType
currentSize
=
min
(
size
-
gridOffset
,
threadsPerGrid
);
computeK4Arg
<<<
cudaBlocks
,
cudaBlockSize
>>>
(
currentSize
,
tau
,
&
_u
[
gridOffset
],
&
_k1
[
gridOffset
],
&
_k3
[
gridOffset
],
&
_kAux
[
gridOffset
]
);
}
cudaDeviceSynchronize
();
this
->
problem
->
applyBoundaryConditions
(
time
+
0.5
*
tau
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
0.5
*
tau
,
tau
,
kAux
,
k4
);
cudaDeviceSynchronize
();
for
(
IndexType
gridIdx
=
0
;
gridIdx
<
cudaGrids
;
gridIdx
++
)
{
const
IndexType
gridOffset
=
gridIdx
*
threadsPerGrid
;
const
IndexType
currentSize
=
min
(
size
-
gridOffset
,
threadsPerGrid
);
computeK5Arg
<<<
cudaBlocks
,
cudaBlockSize
>>>
(
currentSize
,
tau
,
&
_u
[
gridOffset
],
&
_k1
[
gridOffset
],
&
_k3
[
gridOffset
],
&
_k4
[
gridOffset
],
&
_kAux
[
gridOffset
]
);
}
cudaDeviceSynchronize
();
this
->
problem
->
applyBoundaryConditions
(
time
+
tau
,
kAux
);
this
->
problem
->
getExplicitUpdate
(
time
+
tau
,
tau
,
kAux
,
k5
);
cudaDeviceSynchronize
();
#endif
}
}
}
template
<
typename
Problem
>
/*
template< typename Problem >
typename Problem :: RealType Merson< Problem > :: computeError( const RealType tau )
typename Problem :: RealType Merson< Problem > :: computeError( const RealType tau )
{
{
const
IndexType
size
=
k1
->
getSize
();
const IndexType size =
_
k1->getSize();
const
RealType
*
_
k1
=
k1
->
getData
();
const RealType* k1 =
_
k1->getData();
const
RealType
*
_
k3
=
k3
->
getData
();
const RealType* k3 =
_
k3->getData();
const
RealType
*
_
k4
=
k4
->
getData
();
const RealType* k4 =
_
k4->getData();
const
RealType
*
_
k5
=
k5
->
getData
();
const RealType* k5 =
_
k5->getData();
RealType
*
_
kAux
=
kAux
->
getData
();
RealType* kAux =
_
kAux->getData();
RealType eps( 0.0 ), maxEps( 0.0 );
RealType eps( 0.0 ), maxEps( 0.0 );
if( std::is_same< DeviceType, Devices::Host >::value )
if( std::is_same< DeviceType, Devices::Host >::value )
...
@@ -379,10 +344,10 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
...
@@ -379,10 +344,10 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
for( IndexType i = 0; i < size; i ++ )
for( IndexType i = 0; i < size; i ++ )
{
{
RealType err = ( RealType ) ( tau / 3.0 *
RealType err = ( RealType ) ( tau / 3.0 *
abs
(
0.2
*
_
k1
[
i
]
+
abs( 0.2 * k1[ i ] +
-
0.9
*
_
k3
[
i
]
+
-0.9 * k3[ i ] +
0.8
*
_
k4
[
i
]
+
0.8 * k4[ i ] +
-
0.1
*
_
k5
[
i
]
)
);
-0.1 * k5[ i ] ) );
localEps = max( localEps, err );
localEps = max( localEps, err );
}
}
this->openMPErrorEstimateBuffer[ Devices::Host::getThreadIdx() ] = localEps;
this->openMPErrorEstimateBuffer[ Devices::Host::getThreadIdx() ] = localEps;
...
@@ -404,13 +369,13 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
...
@@ -404,13 +369,13 @@ typename Problem :: RealType Merson< Problem > :: computeError( const RealType t
const IndexType currentSize = min( size - gridOffset, threadsPerGrid );
const IndexType currentSize = min( size - gridOffset, threadsPerGrid );
computeErrorKernel<<< cudaBlocks, cudaBlockSize >>>( currentSize,
computeErrorKernel<<< cudaBlocks, cudaBlockSize >>>( currentSize,
tau,
tau,
&
_
k1
[
gridOffset
],
&k1[ gridOffset ],
&
_
k3
[
gridOffset
],
&k3[ gridOffset ],
&
_
k4
[
gridOffset
],
&k4[ gridOffset ],
&
_
k5
[
gridOffset
],
&k5[ gridOffset ],
&
_
kAux
[
gridOffset
]
);
&kAux[ gridOffset ] );
cudaDeviceSynchronize();
cudaDeviceSynchronize();
eps
=
std
::
max
(
eps
,
kAux
->
max
()
);
eps = std::max( eps,
_
kAux->max() );
}
}
#endif
#endif
}
}
...
@@ -425,20 +390,20 @@ void Merson< Problem >::computeNewTimeLevel( const RealType time,
...
@@ -425,20 +390,20 @@ void Merson< Problem >::computeNewTimeLevel( const RealType time,
RealType& currentResidue )
RealType& currentResidue )
{
{
RealType localResidue = RealType( 0.0 );
RealType localResidue = RealType( 0.0 );
IndexType
size
=
k1
->
getSize
();
IndexType size =
_
k1->getSize();
RealType* _u = u->getData();
RealType* _u = u->getData();
RealType
*
_
k1
=
k1
->
getData
();
RealType* k1 =
_
k1->getData();
RealType
*
_
k4
=
k4
->
getData
();
RealType* k4 =
_
k4->getData();
RealType
*
_
k5
=
k5
->
getData
();
RealType* k5 =
_
k5->getData();
if( std::is_same< DeviceType, Devices::Host >::value )
if( std::is_same< DeviceType, Devices::Host >::value )
{
{
#ifdef HAVE_OPENMP
#ifdef HAVE_OPENMP
#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u,
_
k1,
_
k4,
_
k5, tau ) if( Devices::Host::isOMPEnabled() )
#pragma omp parallel for reduction(+:localResidue) firstprivate( size, _u, k1, k4, k5, tau ) if( Devices::Host::isOMPEnabled() )
#endif
#endif
for( IndexType i = 0; i < size; i ++ )
for( IndexType i = 0; i < size; i ++ )
{
{
const
RealType
add
=
tau
/
6.0
*
(
_
k1
[
i
]
+
4.0
*
_
k4
[
i
]
+
_
k5
[
i
]
);
const RealType add = tau / 6.0 * ( k1[ i ] + 4.0 * k4[ i ] + k5[ i ] );
_u[ i ] += add;
_u[ i ] += add;
localResidue += abs( ( RealType ) add );
localResidue += abs( ( RealType ) add );
}
}
...
@@ -462,9 +427,9 @@ void Merson< Problem >::computeNewTimeLevel( const RealType time,
...
@@ -462,9 +427,9 @@ void Merson< Problem >::computeNewTimeLevel( const RealType time,
updateUMerson<<< cudaBlocks, cudaBlockSize, sharedMemory >>>( currentSize,
updateUMerson<<< cudaBlocks, cudaBlockSize, sharedMemory >>>( currentSize,
tau,
tau,
&
_
k1
[
gridOffset
],
&k1[ gridOffset ],
&
_
k4
[
gridOffset
],
&k4[ gridOffset ],
&
_
k5
[
gridOffset
],
&k5[ gridOffset ],
&_u[ gridOffset ],
&_u[ gridOffset ],
this->cudaBlockResidue.getData() );
this->cudaBlockResidue.getData() );
localResidue += this->cudaBlockResidue.sum();
localResidue += this->cudaBlockResidue.sum();
...
@@ -490,17 +455,17 @@ void Merson< Problem >::writeGrids( const DofVectorPointer& u )
...
@@ -490,17 +455,17 @@ void Merson< Problem >::writeGrids( const DofVectorPointer& u )
{
{
std
::
cout
<<
"Writing Merson solver grids ..."
;
std
::
cout
<<
"Writing Merson solver grids ..."
;
File
(
"Merson-u.tnl"
,
std
::
ios_base
::
out
)
<<
*
u
;
File
(
"Merson-u.tnl"
,
std
::
ios_base
::
out
)
<<
*
u
;
File
(
"Merson-k1.tnl"
,
std
::
ios_base
::
out
)
<<
*
k1
;
File
(
"Merson-k1.tnl"
,
std
::
ios_base
::
out
)
<<
*
_
k1
;
File
(
"Merson-k2.tnl"
,
std
::
ios_base
::
out
)
<<
*
k2
;
File
(
"Merson-k2.tnl"
,
std
::
ios_base
::
out
)
<<
*
_
k2
;
File
(
"Merson-k3.tnl"
,
std
::
ios_base
::
out
)
<<
*
k3
;
File
(
"Merson-k3.tnl"
,
std
::
ios_base
::
out
)
<<
*
_
k3
;
File
(
"Merson-k4.tnl"
,
std
::
ios_base
::
out
)
<<
*
k4
;
File
(
"Merson-k4.tnl"
,
std
::
ios_base
::
out
)
<<
*
_
k4
;
File
(
"Merson-k5.tnl"
,
std
::
ios_base
::
out
)
<<
*
k5
;
File
(
"Merson-k5.tnl"
,
std
::
ios_base
::
out
)
<<
*
_
k5
;
std
::
cout
<<
" done. PRESS A KEY."
<<
std
::
endl
;
std
::
cout
<<
" done. PRESS A KEY."
<<
std
::
endl
;
getchar
();
getchar
();
}
}
#ifdef HAVE_CUDA
#ifdef HAVE_CUDA
/*
template< typename RealType, typename Index >
template< typename RealType, typename Index >
__global__ void computeK2Arg( const Index size,
__global__ void computeK2Arg( const Index size,
const RealType tau,
const RealType tau,
...
@@ -552,6 +517,7 @@ __global__ void computeK5Arg( const Index size,
...
@@ -552,6 +517,7 @@ __global__ void computeK5Arg( const Index size,
if( i < size )
if( i < size )
k5_arg[ i ] = u[ i ] + tau * ( 0.5 * k1[ i ] - 1.5 * k3[ i ] + 2.0 * k4[ i ] );
k5_arg[ i ] = u[ i ] + tau * ( 0.5 * k1[ i ] - 1.5 * k3[ i ] + 2.0 * k4[ i ] );
}
}
*/
template
<
typename
RealType
,
typename
Index
>
template
<
typename
RealType
,
typename
Index
>
__global__
void
computeErrorKernel
(
const
Index
size
,
__global__
void
computeErrorKernel
(
const
Index
size
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment