diff --git a/examples/hamilton-jacobi-parallel/main.h b/examples/hamilton-jacobi-parallel/main.h index 83ed80e05ca99240b6404b5babf2b7e912c03331..8fe0efdc6c101e1f80fd853bb4a05c49b104e006 100644 --- a/examples/hamilton-jacobi-parallel/main.h +++ b/examples/hamilton-jacobi-parallel/main.h @@ -21,11 +21,15 @@ #include <operators/godunov-eikonal/parallelGodunovEikonal.h> #include <mesh/tnlGrid.h> #include <core/tnlDevice.h> +#include <time.h> typedef MainBuildConfig BuildConfig; int main( int argc, char* argv[] ) { + time_t start; + time_t stop; + time(&start); tnlParameterContainer parameters; tnlConfigDescription configDescription; parallelEikonalConfig< BuildConfig >::configSetup( configDescription ); @@ -78,6 +82,9 @@ int main( int argc, char* argv[] ) } // } + time(&stop); + cout << endl; + cout << "Running time was: " << difftime(stop,start) << endl; return EXIT_SUCCESS; } diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h index fad69121a0ebab30ddbfdc77eeec94b3c27b2d22..0d6ad90fcc896ea7e8976633aa8138483f8dbc16 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver.h @@ -157,7 +157,7 @@ template <typename SchemeHost, typename SchemeDevice, typename Device> __global__ void initRunCUDA(tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* caller); template <typename SchemeHost, typename SchemeDevice, typename Device> -__global__ void initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, bool * ptr2); +__global__ void initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr, bool * ptr2, int* ptr3); template <typename SchemeHost, typename SchemeDevice, typename Device> __global__ void synchronizeCUDA(tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver); diff --git a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h index 049f712957a1c0c6ab4feebe05f0ddaf3c47a177..1f4a001466147d925d90236ffd2508405037e37f 100644 --- a/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h +++ b/examples/hamilton-jacobi-parallel/tnlParallelEikonalSolver_impl.h @@ -123,8 +123,15 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in //double* tmpw; cudaMalloc(&(this->tmpw), this->work_u.getSize()*sizeof(double)); cudaMalloc(&(this->runcuda), sizeof(bool)); - initCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda)); cudaDeviceSynchronize(); + checkCudaDevice; + int* tmpUC; + cudaMalloc(&(tmpUC), this->work_u.getSize()*sizeof(int)); + cudaMemcpy(tmpUC, this->unusedCell.getData(), this->unusedCell.getSize()*sizeof(int), cudaMemcpyHostToDevice); + + initCUDA<SchemeTypeHost, SchemeTypeDevice, DeviceType><<<1,1>>>(this->cudaSolver, (this->tmpw), (this->runcuda),tmpUC); + cudaDeviceSynchronize(); + checkCudaDevice; //cout << "s " << endl; //cudaMalloc(&(cudaSolver->work_u_cuda), this->work_u.getSize()*sizeof(double)); double* tmpu = NULL; @@ -132,6 +139,8 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in cudaMemcpy(&tmpu, tmpdev,sizeof(double*), cudaMemcpyDeviceToHost); //printf("%p %p \n",tmpu,tmpw); cudaMemcpy((this->tmpw), this->work_u.getData(), this->work_u.getSize()*sizeof(double), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); + checkCudaDevice; //cout << "s "<< endl; } @@ -171,6 +180,8 @@ bool tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::in else if(this->device == tnlCudaDevice) { // cout << "pre 1 kernel" << endl; + cudaDeviceSynchronize(); + checkCudaDevice; dim3 threadsPerBlock(this->n, this->n); dim3 numBlocks(this->gridCols,this->gridRows); cudaDeviceSynchronize(); @@ -1362,7 +1373,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: template< typename SchemeHost, typename SchemeDevice, typename Device> __global__ -void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , bool* ptr2) +void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>::*/initCUDA( tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int >* cudaSolver, double* ptr , bool* ptr2, int* ptr3) { //cout << "Initializating solver..." << endl; //const tnlString& meshLocation = parameters.getParameter <tnlString>("mesh"); @@ -1401,7 +1412,7 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: // this->gridCols_cuda = gridCols; cudaSolver->work_u_cuda = ptr;//(double*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(double)); - cudaSolver->unusedCell_cuda = (int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); + cudaSolver->unusedCell_cuda = ptr3;//(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*cudaSolver->n*cudaSolver->n*sizeof(int)); cudaSolver->subgridValues_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); cudaSolver->boundaryConditions_cuda =(int*)malloc(cudaSolver->gridCols*cudaSolver->gridRows*sizeof(int)); cudaSolver->runcuda = ptr2;//(bool*)malloc(sizeof(bool)); @@ -1413,12 +1424,17 @@ void /*tnlParallelEikonalSolver<SchemeHost, SchemeDevice, Device, double, int>:: for(int i = 0; i < cudaSolver->gridCols*cudaSolver->gridRows; i++) { - for(int j = 0; j < cudaSolver->n*cudaSolver->n; j++) - cudaSolver->unusedCell_cuda[i*cudaSolver->n*cudaSolver->n + j] = 1; cudaSolver->subgridValues_cuda[i] = INT_MAX; cudaSolver->boundaryConditions_cuda[i] = 0; } + /*for(long int j = 0; j < cudaSolver->n*cudaSolver->n*cudaSolver->gridCols*cudaSolver->gridRows; j++) + { + printf("%d\n",j); + cudaSolver->unusedCell_cuda[ j] = 1; + }*/ + printf("GPU memory initialized.\n"); + //cudaSolver->work_u_cuda[50] = 32.153438; ////