Skip to content
Snippets Groups Projects
Commit 23d906d6 authored by Vít Hanousek's avatar Vít Hanousek
Browse files

Add Cuda-Aware-MPI-copy simple sample program to test build system.

parent 63c9f306
No related branches found
No related tags found
No related merge requests found
......@@ -92,15 +92,19 @@ endif()
if( ${CXX_COMPILER_NAME} STREQUAL "mpic++" )
message( "MPI compiler detected." )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_MPI" )
set( CUDA_HOST_COMPILER "mpic++" )
endif()
####
# Check for MPI
# Check for MPI -- not working
#
#find_package( MPI )
#if( MPI_CXX_FOUND )
# set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_MPI" )
# message( "MPI headers found -- ${MPI_CXX_INCLUDE_PATH}")
# set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_MPI" )
# message( "MPI headers found -- ${MPI_CXX_INCLUDE_PATH}")
# message( "MPI link flags -- ${MPI_CXX_LINK_FLAGS}")
# message( "MPI libraries-- ${MPI_CXX_LIBRARIES}")
#endif()
#####
......
......@@ -28,3 +28,8 @@
#TARGET_COMPILE_DEFINITIONS( tnlMeshFuncttionEvaluateTestXY PUBLIC "-DDIMENSION=3" )
#TARGET_COMPILE_DEFINITIONS( tnlMeshFuncttionEvaluateTestXY PUBLIC "-DXDISTR -DYDISTR" )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE( mpi-gpu-test ${headers} mpi-gpu.cu )
#TARGET_LINK_LIBRARIES( mpi-gpu-test ${CPPUNIT_LIBRARIES}
# tnl )
ENDIF( BUILD_CUDA )
#include <iostream>
using namespace std;
#if defined(HAVE_MPI) && defined(HAVE_CUDA)
#include <cuda_runtime.h>
#include <mpi.h>
__global__ void SetKernel(float *deviceData, float value)
{
// Just a dummy kernel
const int idx = blockIdx.x * blockDim.x + threadIdx.x;
deviceData[idx] = value;
}
double sum(float * data, int count)
{
double sum=0;
for(int i=0;i<count;i++)
sum+=data[i];
return sum;
}
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int blockSize = 256; //počet threadů v v bloku
int gridSize = 1000; //počet bloků v gridu -> musí být menší než maxGridSize v cudeGetDeviceProperties
int dataCount=blockSize*gridSize;
float * deviceData=NULL;
cudaMalloc((void **)&deviceData, dataCount * sizeof(float));
if(rank==0)
{
cout << rank<<": "<<"Setup GPU alocated array to 1" << endl;
SetKernel<<< gridSize,blockSize >>>(deviceData,1.0f);
cout << rank<<": "<<" Sending GPU data " <<endl;
MPI_Send((void*)deviceData, dataCount, MPI_FLOAT, 1, 1, MPI_COMM_WORLD);
}
if(rank==1)
{
cout << rank<<": "<<" Reciving GPU data " <<endl;
MPI_Recv((void*) deviceData, dataCount, MPI_FLOAT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
float *data = new float[dataCount];
cout << rank<<": "<<" Copying data from GPU to CPU " <<endl;
cudaMemcpy( (void*) data, (void*)deviceData, dataCount*sizeof(float), cudaMemcpyDeviceToHost);
cout << rank<<": "<<" Computin Sum on CPU " <<endl;
cout << rank<<": "<< "sum:" << sum(data,dataCount) << endl;
delete [] data;
}
cudaFree(deviceData);
MPI_Finalize();
return 0;
}
#else
int main(void)
{
cout << "CUDA or MPI missing...." <<endl;
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment