Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GPUSort
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
TNL
GPUSort
Commits
b90aafc0
There was an error fetching the commit references. Please try again later.
Commit
b90aafc0
authored
3 years ago
by
Xuan Thang Nguyen
Browse files
Options
Downloads
Patches
Plain Diff
write new task
parent
bdc0cb2f
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/quicksort/quicksort.cuh
+83
-264
83 additions, 264 deletions
src/quicksort/quicksort.cuh
with
83 additions
and
264 deletions
src/quicksort/quicksort.cuh
+
83
−
264
View file @
b90aafc0
...
...
@@ -3,154 +3,85 @@
#include
<TNL/Containers/Array.h>
#include
"../util/reduction.cuh"
#include
"task.h"
#include
"cudaPartition.cuh"
#include
"../bitonicSort/bitonicSort.h"
#include
<iostream>
#define deb(x) std::cout << #x << " = " << x << std::endl;
using
CudaArrayView
=
TNL
::
Containers
::
ArrayView
<
int
,
TNL
::
Devices
::
Cuda
>
;
using
CudaTaskArray
=
TNL
::
Containers
::
Array
<
TASK
,
TNL
::
Devices
::
Cuda
>
;
using
namespace
TNL
;
using
namespace
TNL
::
Containers
;
__device__
void
cmpElem
(
CudaArrayView
arr
,
int
myBegin
,
int
myEnd
,
int
&
smaller
,
int
&
bigger
,
volatile
int
pivot
)
//-----------------------------------------------------------
__device__
void
writeNewTask
(
int
begin
,
int
end
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
newTasks
,
int
*
newTasksCnt
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
secondPhaseTasks
,
int
*
secondPhaseTasksCnt
)
{
for
(
int
i
=
myBegin
+
threadIdx
.
x
;
i
<
myEnd
;
i
+=
blockDim
.
x
)
int
size
=
end
-
begin
;
if
(
size
==
0
)
return
;
if
(
size
<=
blockDim
.
x
*
2
)
{
int
idx
=
atomicAdd
(
secondPhaseTasksCnt
,
1
);
secondPhaseTasks
[
idx
]
=
TASK
(
begin
,
end
);
}
else
{
int
data
=
arr
[
i
];
if
(
data
<
pivot
)
smaller
++
;
else
if
(
data
>
pivot
)
bigger
++
;
int
idx
=
atomicAdd
(
newTasksCnt
,
1
);
newTasks
[
idx
]
=
TASK
(
begin
,
end
);
}
}
__device__
void
copyData
(
CudaArrayView
arr
,
int
my
Begin
,
int
my
End
,
Cuda
ArrayView
aux
,
int
smallerStart
,
int
biggerStar
t
,
volatile
int
pivo
t
)
__device__
void
writeNewTasks
(
int
leftBegin
,
int
leftEnd
,
int
right
Begin
,
int
right
End
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
newTasks
,
int
*
newTasksCn
t
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
secondPhaseTasks
,
int
*
secondPhaseTasksCn
t
)
{
for
(
int
i
=
myBegin
+
threadIdx
.
x
;
i
<
myEnd
;
i
+=
blockDim
.
x
)
{
int
data
=
arr
[
i
];
if
(
data
<
pivot
)
aux
[
smallerStart
++
]
=
data
;
else
if
(
data
>
pivot
)
aux
[
biggerStart
++
]
=
data
;
}
writeNewTask
(
leftBegin
,
leftEnd
,
newTasks
,
newTasksCnt
,
secondPhaseTasks
,
secondPhaseTasksCnt
);
writeNewTask
(
rightBegin
,
rightEnd
,
newTasks
,
newTasksCnt
,
secondPhaseTasks
,
secondPhaseTasksCnt
);
}
//----------------------------------------------------
template
<
typename
Function
>
__global__
void
cudaPartition
(
CudaArrayView
arr
,
const
Function
&
Cmp
,
CudaArrayView
aux
,
TNL
::
Containers
::
ArrayView
<
int
,
TNL
::
Devices
::
Cuda
>
cuda_blockToTaskMapping
,
int
elemPerBlock
,
TNL
::
Containers
::
ArrayView
<
TASK
,
TNL
::
Devices
::
Cuda
>
cuda_tasks
,
TNL
::
Containers
::
ArrayView
<
TASK
,
TNL
::
Devices
::
Cuda
>
cuda_newTasks
,
int
*
newTasksCnt
,
TNL
::
Containers
::
ArrayView
<
TASK
,
TNL
::
Devices
::
Cuda
>
cuda_2ndPhaseTasks
,
int
*
cuda_2ndPhaseCnt
)
__global__
void
cudaQuickSort1stPhase
(
ArrayView
<
int
,
Devices
::
Cuda
>
src
,
ArrayView
<
int
,
Devices
::
Cuda
>
dst
,
const
Function
&
Cmp
,
int
elemPerBlock
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
tasks
,
ArrayView
<
int
,
Devices
::
Cuda
>
taskMapping
,
int
*
tasksAmount
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
newTasks
,
int
*
newTasksCnt
,
ArrayView
<
TASK
,
Devices
::
Cuda
>
secondPhaseTasks
,
int
*
secondPhaseTasksCnt
)
{
static
__shared__
TASK
myTask
;
static
__shared__
int
smallerStart
,
biggerStart
;
static
__shared__
int
pivot
;
static
__shared__
int
myTaskIdx
;
static
__shared__
bool
writePivot
;
TASK
&
myTask
=
tasks
[
taskMapping
[
blockIdx
.
x
]];
if
(
threadIdx
.
x
==
0
)
{
myTaskIdx
=
cuda_blockToTaskMapping
[
blockIdx
.
x
];
myTask
=
cuda_tasks
[
myTaskIdx
];
pivot
=
arr
[
myTask
.
arrEnd
-
1
];
writePivot
=
false
;
}
pivot
=
src
[
myTask
.
partitionEnd
-
1
];
__syncthreads
();
//only works if consecutive blocks work on the same task
const
int
myBegin
=
myTask
.
arrBegin
+
elemPerBlock
*
(
blockIdx
.
x
-
myTask
.
firstBlock
);
const
int
myEnd
=
TNL
::
min
(
myTask
.
arrEnd
,
myBegin
+
elemPerBlock
);
//-------------------------------------------------------------------------
int
smaller
=
0
,
bigger
=
0
;
cmpElem
(
arr
,
myBegin
,
myEnd
,
smaller
,
bigger
,
pivot
);
int
smallerOffset
=
blockInclusivePrefixSum
(
smaller
);
int
biggerOffset
=
blockInclusivePrefixSum
(
bigger
);
if
(
threadIdx
.
x
==
blockDim
.
x
-
1
)
//last thread in block has sum of all values
{
smallerStart
=
atomicAdd
(
&
(
cuda_tasks
[
myTaskIdx
].
auxBeginIdx
),
smallerOffset
);
biggerStart
=
atomicAdd
(
&
(
cuda_tasks
[
myTaskIdx
].
auxEndIdx
),
-
biggerOffset
)
-
biggerOffset
;
}
__syncthreads
();
int
destSmaller
=
smallerStart
+
smallerOffset
-
smaller
;
int
destBigger
=
biggerStart
+
biggerOffset
-
bigger
;
copyData
(
arr
,
myBegin
,
myEnd
,
aux
,
destSmaller
,
destBigger
,
pivot
);
//-----------------------------------------------------------
bool
isLast
=
cudaPartition
(
src
.
getView
(
myTask
.
partitionBegin
,
myTask
.
partitionEnd
),
dst
.
getView
(
myTask
.
partitionBegin
,
myTask
.
partitionEnd
),
Cmp
,
pivot
,
elemPerBlock
,
myTask
);
if
(
!
isLast
)
return
;
if
(
threadIdx
.
x
==
0
&&
atomicAdd
(
&
(
cuda_tasks
[
myTaskIdx
].
stillWorkingCnt
),
-
1
)
==
1
)
{
writePivot
=
true
;
myTask
=
cuda_tasks
[
myTaskIdx
];
//update auxBeginIdx, auxEndIdx value
}
__syncthreads
();
myTask
=
tasks
[
taskMapping
[
blockIdx
.
x
]];
i
f
(
!
writePivot
)
return
;
i
nt
leftBegin
=
myTask
.
partitionBegin
,
leftEnd
=
myTask
.
partitionBegin
+
myTask
.
dstBegin
;
int
rightBegin
=
myTask
.
partitionBegin
+
myTask
.
dstEnd
,
rightEnd
=
myTask
.
partitionEnd
;
for
(
int
i
=
myTask
.
auxBeginIdx
+
threadIdx
.
x
;
i
<
myTask
.
auxEndIdx
;
i
+=
blockDim
.
x
)
aux
[
i
]
=
pivot
;
for
(
int
i
=
leftEnd
+
threadIdx
.
x
;
i
<
rightBegin
;
i
+=
blockDim
.
x
)
src
[
i
]
=
dst
[
i
]
=
pivot
;
if
(
threadIdx
.
x
!=
0
)
return
;
if
(
myTask
.
auxBeginIdx
-
myTask
.
arrBegin
>
0
)
//smaller
{
if
(
myTask
.
auxBeginIdx
-
myTask
.
arrBegin
<=
blockDim
.
x
*
2
)
{
int
newTaskIdx
=
atomicAdd
(
cuda_2ndPhaseCnt
,
1
);
cuda_2ndPhaseTasks
[
newTaskIdx
]
=
TASK
(
myTask
.
arrBegin
,
myTask
.
auxBeginIdx
,
myTask
.
arrBegin
,
myTask
.
auxBeginIdx
);
}
else
{
int
newTaskIdx
=
atomicAdd
(
newTasksCnt
,
1
);
cuda_newTasks
[
newTaskIdx
]
=
TASK
(
myTask
.
arrBegin
,
myTask
.
auxBeginIdx
,
myTask
.
arrBegin
,
myTask
.
auxBeginIdx
);
}
}
if
(
myTask
.
arrEnd
-
myTask
.
auxEndIdx
>
0
)
//greater
{
if
(
myTask
.
arrEnd
-
myTask
.
auxEndIdx
<=
blockDim
.
x
*
2
)
{
int
newTaskIdx
=
atomicAdd
(
cuda_2ndPhaseCnt
,
1
);
cuda_2ndPhaseTasks
[
newTaskIdx
]
=
TASK
(
myTask
.
auxEndIdx
,
myTask
.
arrEnd
,
myTask
.
auxEndIdx
,
myTask
.
arrEnd
);
}
else
{
int
newTaskIdx
=
atomicAdd
(
newTasksCnt
,
1
);
cuda_newTasks
[
newTaskIdx
]
=
TASK
(
myTask
.
auxEndIdx
,
myTask
.
arrEnd
,
myTask
.
auxEndIdx
,
myTask
.
arrEnd
);
}
}
writeNewTasks
(
leftBegin
,
leftEnd
,
rightBegin
,
rightEnd
,
newTasks
,
newTasksCnt
,
secondPhaseTasks
,
secondPhaseTasksCnt
);
}
__global__
void
cudaInitTask
(
TNL
::
Containers
::
ArrayView
<
TASK
,
TNL
::
Devices
::
Cuda
>
cuda_tasks
,
//-----------------------------------------------------------
__global__
void
cudaInitTask
(
ArrayView
<
TASK
,
Devices
::
Cuda
>
cuda_tasks
,
int
taskAmount
,
int
elemPerBlock
,
int
*
firstAvailBlock
,
TNL
::
Containers
::
ArrayView
<
int
,
TNL
::
Devices
::
Cuda
>
cuda_blockToTaskMapping
)
ArrayView
<
int
,
Devices
::
Cuda
>
cuda_blockToTaskMapping
)
{
static
__shared__
int
avail
;
...
...
@@ -181,153 +112,43 @@ __global__ void cudaInitTask(TNL::Containers::ArrayView<TASK, TNL::Devices::Cuda
}
}
//-----------------------------------------------------------
template
<
typename
Function
>
__device__
void
cudaQuickSort_block
(
CudaArrayView
arr
,
const
Function
&
Cmp
,
CudaArrayView
aux
,
int
*
stackArrBegin
,
int
*
stackArrEnd
,
int
stackSize
,
int
*
bitonicMem
)
{
static
__shared__
int
begin
,
end
;
static
__shared__
int
stackTop
;
static
__shared__
int
pivotBegin
,
pivotEnd
;
static
__shared__
int
pivot
;
if
(
threadIdx
.
x
==
0
)
{
stackArrBegin
[
0
]
=
0
;
stackArrEnd
[
0
]
=
arr
.
getSize
();
stackTop
=
1
;
}
__syncthreads
();
while
(
stackTop
>
0
)
{
if
(
threadIdx
.
x
==
0
)
{
begin
=
stackArrBegin
[
stackTop
-
1
];
end
=
stackArrEnd
[
stackTop
-
1
];
stackTop
--
;
pivot
=
arr
[
end
-
1
];
}
__syncthreads
();
int
size
=
end
-
begin
;
if
(
size
<=
blockDim
.
x
*
2
)
{
bitonicSort_Block
(
arr
.
getView
(
begin
,
end
),
arr
.
getView
(
begin
,
end
),
bitonicMem
,
Cmp
);
continue
;
}
int
smaller
=
0
,
bigger
=
0
;
cmpElem
(
arr
,
begin
,
end
,
smaller
,
bigger
,
pivot
);
int
smallerOffset
=
blockInclusivePrefixSum
(
smaller
);
int
biggerOffset
=
blockInclusivePrefixSum
(
bigger
);
if
(
threadIdx
.
x
==
blockDim
.
x
-
1
)
{
pivotBegin
=
begin
+
smallerOffset
;
pivotEnd
=
end
-
biggerOffset
;
}
__syncthreads
();
int
destSmaller
=
smallerOffset
-
smaller
;
int
destBigger
=
pivotEnd
+
biggerOffset
-
bigger
;
copyData
(
arr
,
begin
,
end
,
aux
,
destSmaller
,
destBigger
,
pivot
);
__syncthreads
();
for
(
int
i
=
begin
+
threadIdx
.
x
;
i
<
end
;
i
+=
blockDim
.
x
)
{
if
(
i
>=
pivotBegin
&&
i
<
pivotEnd
)
arr
[
i
]
=
pivot
;
else
arr
[
i
]
=
aux
[
i
];
}
if
(
threadIdx
.
x
==
0
)
{
if
(
pivotBegin
-
begin
>
1
)
//left from pivot are smaller elems
{
stackArrBegin
[
stackTop
]
=
begin
;
stackArrEnd
[
stackTop
]
=
pivotBegin
;
stackTop
++
;
}
if
(
end
-
pivotEnd
>
1
)
//right from pivot until end are elem greater than pivot
{
stackArrBegin
[
stackTop
]
=
pivotEnd
;
stackArrEnd
[
stackTop
]
=
end
;
stackTop
++
;
}
}
__syncthreads
();
}
}
template
<
typename
Function
>
__global__
void
cudaQuickSort
(
CudaArrayView
arr
,
const
Function
&
Cmp
,
CudaArrayView
aux
,
int
stackSize
,
TNL
::
Containers
::
ArrayView
<
TASK
,
TNL
::
Devices
::
Cuda
>
cuda_tasks
)
{
extern
__shared__
int
externMem
[];
static
__shared__
TASK
task
;
if
(
threadIdx
.
x
==
0
)
task
=
cuda_tasks
[
blockIdx
.
x
];
__syncthreads
();
int
*
bitonicMem
=
externMem
;
int
*
stackLeft
=
bitonicMem
+
(
2
*
blockDim
.
x
);
int
*
stackRight
=
stackLeft
+
(
stackSize
/
2
);
cudaQuickSort_block
(
arr
.
getView
(
task
.
arrBegin
,
task
.
arrEnd
),
Cmp
,
aux
.
getView
(
task
.
auxBeginIdx
,
task
.
auxEndIdx
),
stackLeft
,
stackRight
,
stackSize
/
2
,
bitonicMem
);
}
//-----------------------------------------------------------
//-----------------------------------------------------------
const
int
threadsPerBlock
=
512
,
maxBlocks
=
1
<<
15
;
//32k
const
int
maxTasks
=
1
<<
10
;
const
int
minElemPerBlock
=
threadsPerBlock
*
2
;
const
int
maxTasks
=
1
<<
10
;
const
int
minElemPerBlock
=
threadsPerBlock
*
2
;
class
QUICKSORT
{
Cuda
ArrayView
arr
;
TNL
::
Containers
::
Array
<
int
,
TNL
::
Devices
::
Cuda
>
aux
;
ArrayView
<
int
,
Devices
::
Cuda
>
arr
;
Array
<
int
,
Devices
::
Cuda
>
aux
;
CudaTaskArray
cuda_tasks
,
cuda_newTasks
,
cuda_2ndPhaseTasks
;
Array
<
TASK
,
Devices
::
Cuda
>
cuda_tasks
,
cuda_newTasks
,
cuda_2ndPhaseTasks
;
TNL
::
Containers
::
Array
<
int
,
TNL
::
Devices
::
Cuda
>
cuda_newTasksAmount
,
cuda_2ndPhaseTasksAmount
;
//is in reality 1 integer
int
tasksAmount
;
//counter for Host == cuda_newTasksAmount
int
totalTask
;
// cuda_newTasksAmount + cuda_2ndPhaseTasksAmount
Array
<
int
,
Devices
::
Cuda
>
cuda_newTasksAmount
,
cuda_2ndPhaseTasksAmount
;
//is in reality 1 integer
int
tasksAmount
;
//counter for Host == cuda_newTasksAmount
int
totalTask
;
// cuda_newTasksAmount + cuda_2ndPhaseTasksAmount
TNL
::
Containers
::
Array
<
int
,
TNL
::
Devices
::
Cuda
>
cuda_blockToTaskMapping
;
TNL
::
Containers
::
Array
<
int
,
TNL
::
Devices
::
Cuda
>
cuda_blockToTaskMapping_Cnt
;
//is in reality 1 integer
Array
<
int
,
Devices
::
Cuda
>
cuda_blockToTaskMapping
;
Array
<
int
,
Devices
::
Cuda
>
cuda_blockToTaskMapping_Cnt
;
//is in reality 1 integer
int
iteration
=
0
;
//--------------------------------------------------------------------------------------
public:
QUICKSORT
(
Cuda
ArrayView
_arr
)
QUICKSORT
(
ArrayView
<
int
,
Devices
::
Cuda
>
_arr
)
:
arr
(
_arr
),
aux
(
arr
.
getSize
()),
cuda_tasks
(
maxBlocks
),
cuda_newTasks
(
maxBlocks
),
cuda_2ndPhaseTasks
(
maxBlocks
),
cuda_newTasksAmount
(
1
),
cuda_2ndPhaseTasksAmount
(
1
),
cuda_blockToTaskMapping
(
maxBlocks
),
cuda_blockToTaskMapping_Cnt
(
1
)
{
cuda_tasks
.
setElement
(
0
,
TASK
(
0
,
arr
.
getSize
(),
0
,
arr
.
getSize
()));
cuda_tasks
.
setElement
(
0
,
TASK
(
0
,
arr
.
getSize
()));
totalTask
=
tasksAmount
=
1
;
cuda_2ndPhaseTasksAmount
=
0
;
}
template
<
typename
Function
>
void
sort
(
const
Function
&
Cmp
)
template
<
typename
Function
>
void
sort
(
const
Function
&
Cmp
)
{
while
(
tasksAmount
>
0
&&
totalTask
<
maxTasks
)
{
...
...
@@ -338,31 +159,29 @@ public:
{
cudaPartition
<<<
blocksCnt
,
threadsPerBlock
>>>
(
arr
,
Cmp
,
aux
.
getView
(),
aux
.
getView
(),
cuda_blockToTaskMapping
.
getView
(),
elemPerBlock
,
cuda_tasks
.
getView
(),
cuda_newTasks
.
getView
(),
cuda_newTasksAmount
.
getData
(),
cuda_2ndPhaseTasks
.
getView
(),
cuda_2ndPhaseTasksAmount
.
getData
()
);
cuda_2ndPhaseTasks
.
getView
(),
cuda_2ndPhaseTasksAmount
.
getData
());
}
else
{
cudaPartition
<<<
blocksCnt
,
threadsPerBlock
>>>
(
arr
,
Cmp
,
aux
.
getView
(),
aux
.
getView
(),
cuda_blockToTaskMapping
.
getView
(),
elemPerBlock
,
cuda_newTasks
.
getView
(),
cuda_tasks
.
getView
(),
//swapped order to write back and forth without copying
cuda_newTasksAmount
.
getData
(),
cuda_2ndPhaseTasks
.
getView
(),
cuda_2ndPhaseTasksAmount
.
getData
()
);
cuda_2ndPhaseTasks
.
getView
(),
cuda_2ndPhaseTasksAmount
.
getData
());
}
tasksAmount
=
processNewTasks
();
iteration
++
;
}
}
_2ndPhase
(
Cmp
);
...
...
@@ -427,21 +246,21 @@ public:
return
tasksAmount
;
}
template
<
typename
Function
>
void
_2ndPhase
(
const
Function
&
Cmp
)
template
<
typename
Function
>
void
_2ndPhase
(
const
Function
&
Cmp
)
{
if
(
totalTask
==
0
)
return
;
if
(
totalTask
==
0
)
return
;
TNL
::
Algorithms
::
MultiDeviceMemoryOperations
<
TNL
::
Devices
::
Cuda
,
TNL
::
Devices
::
Cuda
>::
copy
(
cuda_2ndPhaseTasks
.
getData
()
+
(
totalTask
-
tasksAmount
),
(
iteration
%
2
?
cuda_newTasks
.
getData
()
:
cuda_tasks
.
getData
()
),
tasksAmount
);
copy
(
cuda_2ndPhaseTasks
.
getData
()
+
(
totalTask
-
tasksAmount
),
(
iteration
%
2
?
cuda_newTasks
.
getData
()
:
cuda_tasks
.
getData
()),
tasksAmount
);
int
blocks
=
totalTask
;
int
stackSize
=
128
,
stackMem
=
stackSize
*
sizeof
(
int
);
int
bitonicMem
=
threadsPerBlock
*
2
*
sizeof
(
int
);
int
stackSize
=
128
,
stackMem
=
stackSize
*
sizeof
(
int
);
int
bitonicMem
=
threadsPerBlock
*
2
*
sizeof
(
int
);
int
auxMem
=
stackMem
+
bitonicMem
;
cudaQuickSort
<<<
blocks
,
threadsPerBlock
,
auxMem
>>>
(
arr
,
Cmp
,
aux
.
getView
(),
stackSize
,
cuda_2ndPhaseTasks
.
getView
());
}
...
...
@@ -449,14 +268,14 @@ public:
//-----------------------------------------------------------
template
<
typename
Function
>
void
quicksort
(
Cuda
ArrayView
arr
,
const
Function
&
Cmp
)
template
<
typename
Function
>
void
quicksort
(
ArrayView
<
int
,
Devices
::
Cuda
>
arr
,
const
Function
&
Cmp
)
{
QUICKSORT
sorter
(
arr
);
sorter
.
sort
(
Cmp
);
}
void
quicksort
(
Cuda
ArrayView
arr
)
void
quicksort
(
ArrayView
<
int
,
Devices
::
Cuda
>
arr
)
{
quicksort
(
arr
,
[]
__cuda_callable__
(
int
a
,
int
b
)
{
return
a
<
b
;});
quicksort
(
arr
,
[]
__cuda_callable__
(
int
a
,
int
b
)
{
return
a
<
b
;
});
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment