From a290ac2b9025f7363cb38f924feaea0694c451ee Mon Sep 17 00:00:00 2001
From: Tomas Sobotik <sobotto4@fjfi.cvut.cz>
Date: Tue, 14 Apr 2015 13:39:41 +0200
Subject: [PATCH] Minor Fixes

---
 examples/hamilton-jacobi-parallel/main.h      |  7 +++++
 .../tnlParallelEikonalSolver.h                |  2 +-
 .../tnlParallelEikonalSolver_impl.h           | 26 +++++++++++++++----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/examples/hamilton-jacobi-parallel/main.h b/examples/hamilton-jacobi-parallel/main.h
index 83ed80e05c..8fe0efdc6c 100644
--- a/examples/hamilton-jacobi-parallel/main.h
+++ b/examples/hamilton-jacobi-parallel/main.h
@@ -21,11 +21,15 @@
 #include <operators/godunov-eikonal/parallelGodunovEikonal.h>
 #include <mesh/tnlGrid.h>
 #include <core/tnlDevice.h>
+#include <time.h>
 
 typedef MainBuildConfig BuildConfig;
 
 int main( int argc, char* argv[] )
 {
+	time_t start;
+	time_t stop;
+	time(&start);
    tnlParameterContainer parameters;
    tnlConfigDescription configDescription;
    parallelEikonalConfig< BuildConfig >::configSetup( configDescription );
@@ -78,6 +82,9 @@ int main( int argc, char* argv[] )
    }
   // }
 
+   time(&stop);
+   cout << endl;
+   cout << "Running time was: " << difftime(stop,start) << endl;
    return EXIT_SUCCESS;
 }
 
diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
index fad69121a0..0d6ad90fcc 100644
--- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
+++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h
@@ -157,7 +157,7 @@ template <typename SchemeHost, typename SchemeDevice, typename Device>
 __global__ void initRunCUDA(tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* caller);
 
 template <typename SchemeHost, typename SchemeDevice, typename Device>
-__global__ void initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, bool * ptr2);
+__global__ void initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, bool * ptr2, int* ptr3);
 
 template <typename SchemeHost, typename SchemeDevice, typename Device>
 __global__ void synchronizeCUDA(tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver);
diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h
index 049f712957..1f4a001466 100644
--- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h
+++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h
@@ -123,8 +123,15 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in
 	//double* tmpw;
 	cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double));
 	cudaMalloc(&(this->runcuda), sizeof(bool));
-	initCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda));
 	cudaDeviceSynchronize();
+	checkCudaDevice;
+	int* tmpUC;
+	cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int));
+	cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice);
+
+	initCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
 	//cout << "s " << endl;
 	//cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double));
 	double* tmpu = NULL;
@@ -132,6 +139,8 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in
 	cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost);
 	//printf("%p %p \n",tmpu,tmpw);
 	cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice);
+	cudaDeviceSynchronize();
+	checkCudaDevice;
 	//cout << "s "<< endl;
 
 	}
@@ -171,6 +180,8 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in
 	else if(this->device == tnlCudaDevice)
 	{
 //		cout << "pre 1 kernel" << endl;
+		cudaDeviceSynchronize();
+		checkCudaDevice;
 		dim3 threadsPerBlock(this->n, this->n);
 		dim3 numBlocks(this->gridCols,this->gridRows);
 		cudaDeviceSynchronize();
@@ -1362,7 +1373,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
 
 template< typename SchemeHost, typename SchemeDevice, typename Device>
 __global__
-void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , bool* ptr2)
+void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , bool* ptr2, int* ptr3)
 {
 	//cout << "Initializating solver..." << endl;
 	//const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh");
@@ -1401,7 +1412,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
 //	this->gridCols_cuda = gridCols;
 
 	cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double));
-	cudaSolver->unusedCell_cuda = (int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int));
+	cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int));
 	cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
 	cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int));
 	cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool));
@@ -1413,12 +1424,17 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::
 
 	for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++)
 	{
-		for(int j = 0; j < cudaSolver->n*cudaSolver->n; j++)
-			cudaSolver->unusedCell_cuda[i*cudaSolver->n*cudaSolver->n + j] = 1;
 		cudaSolver->subgridValues_cuda[i] = INT_MAX;
 		cudaSolver->boundaryConditions_cuda[i] = 0;
 	}
 
+	/*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++)
+	{
+		printf("%d\n",j);
+		cudaSolver->unusedCell_cuda[ j] = 1;
+	}*/
+	printf("GPU memory initialized.\n");
+
 
 	//cudaSolver->work_u_cuda[50] = 32.153438;
 ////
-- 
GitLab