diff --git a/CMakeLists.txt b/CMakeLists.txt
index f32e8fbb152a61b3f75e60ce8b35a82aa5f515fd..b2032edc92135c4aad45ef18840f3f6ee3c5d416 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -188,7 +188,7 @@ if( WITH_CUDA STREQUAL "yes" )
         if( NOT WITH_CUBLAS STREQUAL "no" )
             find_path( CUBLAS_INCLUDE_DIR cublas_v2.h
                        /usr/local/cuda/include
-                       ${CUDA_INCLUDE_DIR}
+                       ${CUDA_INCLUDE_DIRS}
                        DOC "CUBLAS headers." )
             if( ${CUBLAS_INCLUDE_DIR} STREQUAL "CUBLAS_INCLUDE_DIR-NOTFOUND" )
                 message( "CUBLAS not found." )
@@ -219,7 +219,7 @@ if( WITH_CUDA STREQUAL "yes" )
         if( NOT WITH_CUSPARSE STREQUAL "no" )
            find_path( CUSPARSE_INCLUDE_DIR cusparse.h
                       /usr/local/cuda/include                   
-                      ${CUDA_INCLUDE_DIR}  
+                      ${CUDA_INCLUDE_DIRS}  
                       DOC "CUSPARSE headers." )
            if( ${CUSPARSE_INCLUDE_DIR} STREQUAL "CUSPARSE_INCLUDE_DIR-NOTFOUND" )
                message( "CUSPARSE not found." )
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO.h b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO.h
index 0a54b312c2ed19761c442b60ef9c8460af810a63..e6b0e84a3bfcba2875e0af90fc97a524c5de45c3 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGridIO.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGridIO.h
@@ -18,11 +18,15 @@
 
 #include <iostream>
 
+#ifdef MPIIO
+#include <TNL/Communicators/MpiCommunicator.h>
+#endif
+
 namespace TNL {
 namespace Meshes {   
 namespace DistributedMeshes {
 
-enum DistrGridIOTypes { Dummy = 0 , LocalCopy = 1, MPIIO=2 };
+enum DistrGridIOTypes { Dummy = 0 , LocalCopy = 1, MpiIO=2 };
     
 template<typename MeshFunctionType,
          DistrGridIOTypes type = LocalCopy> 
@@ -33,12 +37,12 @@ class DistributedGridIO
 template<typename MeshFunctionType> 
 class DistributedGridIO<MeshFunctionType,Dummy>
 {
-    bool save(File &file,MeshFunctionType meshFunction)
+    bool save(const String& fileName, MeshFunctionType &meshFunction)
     {
         return true;
     };
             
-    bool load(File &file,MeshFunctionType &meshFunction) 
+    bool load(const String& fileName, MeshFunctionType &meshFunction) 
     {
         return true;
     };
@@ -61,13 +65,13 @@ class DistributedGridIO<MeshFunctionType,LocalCopy>
     typedef typename MeshFunctionType::VectorType VectorType;
     //typedef DistributedGrid< MeshType,MeshFunctionType::getMeshDimension()> DistributedGridType;
     
-    static bool save(File &file,File &meshOutputFile, MeshFunctionType &meshFunction)
+    static bool save(const String& fileName, MeshFunctionType &meshFunction)
     {
         auto *distrGrid=meshFunction.getMesh().GetDistMesh();
         
         if(distrGrid==NULL) //not distributed
         {
-            return meshFunction.save(file);
+            return meshFunction.save(fileName);
         }
 
         MeshType mesh=meshFunction.getMesh();
@@ -83,7 +87,10 @@ class DistributedGridIO<MeshFunctionType,LocalCopy>
         newMesh->setSpaceSteps(spaceSteps);
         newMesh->setOrigin(origin+TNL::Containers::tnlDotProduct(spaceSteps,localBegin));
         
-        newMesh->save( meshOutputFile );
+        File meshFile;
+        meshFile.open( fileName+String("-mesh-")+distrGrid->printProcessCoords(),IOMode::write);
+        newMesh->save( meshFile );
+        meshFile.close();
 
         VectorType newDof(newMesh-> template getEntitiesCount< typename MeshType::Cell >());
 
@@ -94,16 +101,22 @@ class DistributedGridIO<MeshFunctionType,LocalCopy>
         zeroCoord.setValue(0);
 
         CopyEntitiesHelper<MeshFunctionType>::Copy(meshFunction,newMeshFunction,localBegin,zeroCoord,localSize);
-        return newMeshFunction.save(file);
+
+        File file;
+        file.open( fileName+String("-")+distrGrid->printProcessCoords(), IOMode::write );
+        bool ret=newMeshFunction.save(file);
+        file.close();
+
+        return ret;
         
     };
             
-    static bool load(File &file,MeshFunctionType &meshFunction) 
+    static bool load(const String& fileName,MeshFunctionType &meshFunction) 
     {
         auto *distrGrid=meshFunction.getMesh().GetDistMesh();
         if(distrGrid==NULL) //not distributed
         {
-            return meshFunction.boundLoad(file);
+            return meshFunction.boundLoad(fileName);
         }
 
         MeshType mesh=meshFunction.getMesh();
@@ -126,7 +139,10 @@ class DistributedGridIO<MeshFunctionType,LocalCopy>
         CoordinatesType zeroCoord;
         zeroCoord.setValue(0);        
 
+        File file;
+        file.open( fileName+String("-")+distrGrid->printProcessCoords(), IOMode::read );
         bool result=newMeshFunction.boundLoad(file);
+        file.close();
         CopyEntitiesHelper<MeshFunctionType>::Copy(newMeshFunction,meshFunction,zeroCoord,localBegin,localSize);
         
         return result;
@@ -136,10 +152,13 @@ class DistributedGridIO<MeshFunctionType,LocalCopy>
 
 /*
  * Save distributed data into single file without overlaps using MPIIO and MPI datatypes, 
- * EXPLOSIVE: works with only Grids
+ * EXPLOSIVE: works with only Grids and MPI
+ * BAD IMPLEMENTTION creating MPI-Types at every save! -- I dont want contamine more places by MPI..
  */
-/*template<typename MeshFunctionType> 
-class DistributedGridIO<MeshFunctionType,MPIIO>
+
+#ifdef MPIIO
+template<typename MeshFunctionType> 
+class DistributedGridIO<MeshFunctionType,MpiIO>
 {
 
     public:
@@ -150,39 +169,231 @@ class DistributedGridIO<MeshFunctionType,MPIIO>
     typedef typename MeshFunctionType::VectorType VectorType;
     //typedef DistributedGrid< MeshType,MeshFunctionType::getMeshDimension()> DistributedGridType;
     
-    static bool save(File &file,File &meshOutputFile, MeshFunctionType &meshFunction)
+    static bool save(const String& fileName, MeshFunctionType &meshFunction)
     {
         auto *distrGrid=meshFunction.getMesh().GetDistMesh();
         
         if(distrGrid==NULL) //not distributed
         {
-            return meshFunction.save(file);
+            return meshFunction.save(fileName);
         }
 
-        int dim=distrGrid.getMeshDimension();
+       MPI_Datatype ftype;
+       MPI_Datatype atype;
+       int dataCount=CreateDataTypes(distrGrid,&ftype,&atype);
 
-        MeshType mesh=meshFunction.getMesh();
-        
-        fgsize[2],flsize[2],fstarts[2];
+       double * data=meshFunction.getData().getData();//TYP
+
+       //write 
+       MPI_File file;
+       MPI_File_open(MPI_COMM_WORLD,fileName.getString(),
+        	   MPI_MODE_CREATE|MPI_MODE_WRONLY,
+               MPI_INFO_NULL, &file);
 
-        return newMeshFunction.save(file);
+       int headerSize=0;
+
+       if(Communicators::MpiCommunicator::GetRank()==0)
+       {
+            headerSize=writeMeshFunctionHeader(file,meshFunction,dataCount);
+       }
+       MPI_Bcast(&headerSize, 1, MPI_INT,0, MPI_COMM_WORLD);
+
+       MPI_File_set_view(file,headerSize,MPI_DOUBLE,ftype,"native",MPI_INFO_NULL);//TYP
+       MPI_Status wstatus;
+
+       MPI_File_write(file,data,1,atype,&wstatus);
+
+       MPI_File_close(&file);
+
+       MPI_Type_free(&atype);
+       MPI_Type_free(&ftype);
+
+       return true;
+    };
+
+    template<typename DitsributedGridType>
+    static int CreateDataTypes(DitsributedGridType *distrGrid,MPI_Datatype *ftype,MPI_Datatype *atype)
+    {
+        int dim=distrGrid->getMeshDimension();
+
+        int fstarts[dim];
+        int flsize[dim];
+        int fgsize[dim];
         
+        hackArray(dim,fstarts,distrGrid->getGlobalBegin().getData());
+        hackArray(dim,flsize,distrGrid->getLocalSize().getData());
+        hackArray(dim,fgsize,distrGrid->getGlobalSize().getData());
+
+        MPI_Type_create_subarray(dim,
+            fgsize,flsize,fstarts,
+            MPI_ORDER_C,MPI_DOUBLE,ftype); //TYP
+        MPI_Type_commit(ftype);
+
+       int agsize[dim];
+       int alsize[dim];
+       int astarts[dim]; 
+
+       hackArray(dim,astarts,distrGrid->getLocalBegin().getData());
+       hackArray(dim,alsize,distrGrid->getLocalSize().getData());
+       hackArray(dim,agsize,distrGrid->getLocalGridSize().getData());
+
+       MPI_Type_create_subarray(dim,
+            agsize,alsize,astarts,
+            MPI_ORDER_C,MPI_DOUBLE,atype); //TYP
+       MPI_Type_commit(atype);
+
+        int dataCount=1;
+        for(int i=0;i<dim;i++)
+            dataCount*=fgsize[i];
+
+        return dataCount;
+
+    }
+
+    template<typename Index>
+    static void hackArray(int dim, int* out, Index* in)
+    {
+        if(dim==1)
+        {
+            out[0]=in[0];
+        }
+
+        if(dim==2)
+        {
+           out[1]=in[0];
+           out[0]=in[1];
+        }
+
+        if(dim==3)
+        {
+           out[0]=in[2];
+           out[1]=in[1];
+           out[2]=in[0];
+        }
+    }
+
+    static unsigned int writeMeshFunctionHeader(MPI_File file,MeshFunctionType &meshFunction, int length )
+    {
+
+        unsigned int size=0;
+        int count;
+        MPI_Status wstatus;
+
+        //Magic
+        MPI_File_write(file,"TNLMN",5,MPI_CHAR,&wstatus);
+        MPI_Get_count(&wstatus,MPI_CHAR,&count);
+        size+=count*sizeof(char);
+
+        //Meshfunction type
+        String meshFunctionSerializationType=meshFunction.getSerializationTypeVirtual();
+        int meshFunctionSerializationTypeLength=meshFunctionSerializationType.getLength();
+        MPI_File_write(file,&meshFunctionSerializationTypeLength,1,MPI_INT,&wstatus);
+        MPI_Get_count(&wstatus,MPI_INT,&count);
+        size+=count*sizeof(int);
+        MPI_File_write(file,meshFunctionSerializationType.getString(),meshFunctionSerializationType.getLength(),MPI_CHAR,&wstatus);
+        MPI_Get_count(&wstatus,MPI_CHAR,&count);
+        size+=count*sizeof(char);
+
+        //Magic
+        MPI_File_write(file,"TNLMN",5,MPI_CHAR,&wstatus);
+        MPI_Get_count(&wstatus,MPI_CHAR,&count);
+        size+=count*sizeof(char);
+        //Vector Type
+        String dataSerializationType=meshFunction.getData().getSerializationTypeVirtual();
+        int dataSerializationTypeLength=dataSerializationType.getLength();
+        MPI_File_write(file,&dataSerializationTypeLength,1,MPI_INT,&wstatus);
+        MPI_Get_count(&wstatus,MPI_INT,&count);
+        size+=count*sizeof(int);
+        MPI_File_write(file,dataSerializationType.getString(),dataSerializationType.getLength(),MPI_CHAR,&wstatus);
+        MPI_Get_count(&wstatus,MPI_CHAR,&count);
+        size+=count*sizeof(char);
+        //Data count
+        MPI_File_write(file,&(length),1,MPI_INT,&wstatus);
+        MPI_Get_count(&wstatus,MPI_INT,&count);
+        size+=count*sizeof(int);
+
+        return size;
     };
             
-    static bool load(File &file,MeshFunctionType &meshFunction) 
+    /* Funky bomb - no checks - only dirty load */
+    static bool load(const String& fileName,MeshFunctionType &meshFunction) 
     {
         auto *distrGrid=meshFunction.getMesh().GetDistMesh();
         if(distrGrid==NULL) //not distributed
         {
-            return meshFunction.boundLoad(file);
+            return meshFunction.boundLoad(fileName);
         }
 
-        MeshType mesh=meshFunction.getMesh();
+       MPI_Datatype ftype;
+       MPI_Datatype atype;
+       int dataCount=CreateDataTypes(distrGrid,&ftype,&atype);
+
+       double * data=meshFunction.getData().getData();//TYP
+
+       //write 
+       MPI_File file;
+       MPI_File_open(MPI_COMM_WORLD,fileName.getString(),
+        	   MPI_MODE_RDONLY,
+               MPI_INFO_NULL, &file);
+       
+       int headerSize=0;
+
+       if(Communicators::MpiCommunicator::GetRank()==0)
+       {
+            headerSize=readMeshFunctionHeader(file,meshFunction,dataCount);
+       }
+       MPI_Bcast(&headerSize, 1, MPI_INT,0, MPI_COMM_WORLD);
+       
+       if(headerSize<0)
+            return false;
+
+       MPI_File_set_view(file,headerSize,MPI_DOUBLE,ftype,"native",MPI_INFO_NULL);//TYP
+       MPI_Status wstatus;
+       MPI_File_read(file,(void*)data,1,atype,&wstatus);
+       MPI_File_close(&file);
         
+       MPI_Type_free(&atype);
+       MPI_Type_free(&ftype);
+
+       return true;
+    };
+
+    //tak mohlo by to něco kontrolovat...ale nic to nekontroluje
+    static int readMeshFunctionHeader(MPI_File file,MeshFunctionType &meshFunction, int length)
+    {
+        MPI_Status rstatus;
+        char buffer[255];
+        int size=0;
+        int count=0;
+
+        MPI_File_read(file, (void *)buffer,5, MPI_CHAR, &rstatus);//MAGIC
+        size+=5*sizeof(char);
+        MPI_File_read(file, (void *)&count,1, MPI_INT, &rstatus);//SIZE OF DATATYPE
+        size+=1*sizeof(int);
+        MPI_File_read(file, (void *)&buffer,count, MPI_CHAR, &rstatus);//DATATYPE
+        size+=count*sizeof(char);
+
+        MPI_File_read(file, (void *)buffer,5, MPI_CHAR, &rstatus);//MAGIC
+        size+=5*sizeof(char);
+        MPI_File_read(file, (void *)&count,1, MPI_INT, &rstatus);//SIZE OF DATATYPE
+        size+=1*sizeof(int);
+        MPI_File_read(file, (void *)&buffer,count, MPI_CHAR, &rstatus);//DATATYPE
+        size+=count*sizeof(char);
+        MPI_File_read(file, (void *)&count,1, MPI_INT, &rstatus);//DATACOUNT
+        size+=1*sizeof(int);
         
+        if(count!=length)
+        {
+            std::cerr<<"Chyba načítání MeshFunction, délka dat v souboru neodpovídá očekávané délce" << std::endl;
+            size=-1;
+        }
+
+        return size;
     };
     
-};*/
+};
+
+#endif
 
 
 }
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_1D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_1D.h
index 630a6ecda11a5b680f678f8cd1a68c7e6cadc7c4..874955452346be5429fa5948d8fb209d5357051c 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_1D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_1D.h
@@ -36,6 +36,8 @@ class DistributedMesh<Grid< 1, RealType, Device, Index >>
         CoordinatesType localsize;
         CoordinatesType localgridsize;
         CoordinatesType overlap;
+        CoordinatesType globalsize;
+        CoordinatesType globalbegin;
         PointType spaceSteps;
         
         
@@ -95,6 +97,8 @@ class DistributedMesh<Grid< 1, RealType, Device, Index >>
                localorigin=globalGrid.getOrigin();
                localsize=globalGrid.getDimensions();
                localgridsize=globalGrid.getDimensions();
+               globalsize=globalGrid.getDimensions();
+               globalbegin=CoordinatesType(0);
                
                localbegin=CoordinatesType(0);
                return;
@@ -106,21 +110,28 @@ class DistributedMesh<Grid< 1, RealType, Device, Index >>
                    left=rank-1;
                if(rank!=nproc-1)
                    right=rank+1;
-                  
+
+               globalsize=globalGrid.getDimensions();                 
+
                //compute local mesh size               
                numberoflarger=globalGrid.getDimensions().x()%nproc;
-                 
+
                localsize.x()=(globalGrid.getDimensions().x()/nproc);               
                if(numberoflarger>rank)
                     localsize.x()+=1;                      
                                   
                if(numberoflarger>rank)
+               {
+                   globalbegin.x()=rank*localsize.x();
                    localorigin.x()=globalGrid.getOrigin().x()
-                                +(rank*localsize.x()-overlap.x())*globalGrid.getSpaceSteps().x();
+                                +(globalbegin.x()-overlap.x())*globalGrid.getSpaceSteps().x();
+               }
                else
-                   localorigin.x()=globalGrid.getOrigin().x()
-                                +(numberoflarger*(localsize.x()+1)+(rank-numberoflarger)*localsize.x()-overlap.x())
-                                *globalGrid.getSpaceSteps().x();
+               {
+                   globalbegin.x()=numberoflarger*(localsize.x()+1)+(rank-numberoflarger)*localsize.x();
+                   localorigin.x()=(globalGrid.getOrigin().x()-overlap.x())
+                                +globalbegin.x()*globalGrid.getSpaceSteps().x();
+               }
               
               localbegin=overlap;
                
@@ -151,14 +162,14 @@ class DistributedMesh<Grid< 1, RealType, Device, Index >>
            grid.SetDistMesh(this);
        };
        
-       void printcoords(std::ostream& out)
+       String printProcessCoords()
        {
-           out<<rank<<":";
+           return convertToString(rank);
        };
 
-       void printdistr(std::ostream& out)
+       String printProcessDistr()
        {
-           out<<"("<<nproc<<"):";
+           return convertToString(nproc);
        };       
 
        bool IsDistributed(void)
@@ -183,17 +194,31 @@ class DistributedMesh<Grid< 1, RealType, Device, Index >>
            return this->overlap;
        };
 
+       //number of elemnts of local sub domain WITHOUT overlap
        CoordinatesType getLocalSize()
        {
            return this->localsize;
        }
 
+       //number of elements of global grid
+       CoordinatesType getGlobalSize()
+       {
+           return this->globalsize;
+       }
+
+       //coordinates of begin of local subdomain without overlaps in global grid
+       CoordinatesType getGlobalBegin()
+       {
+           return this->globalbegin;
+       }
+
+       //number of elemnts of local sub domain WITH overlap
        CoordinatesType getLocalGridSize()
        {
            return this->localgridsize;
        }
        
-              
+       //coordinates of begin of local subdomain without overlaps in local grid       
        CoordinatesType getLocalBegin()
        {
            return this->localbegin;
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_2D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_2D.h
index ac43b8fcd8eef9461995edbc0c2c810bdccfe94a..0c0dcf845c899fa744f40678883f6242184548a8 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_2D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_2D.h
@@ -38,6 +38,8 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
         CoordinatesType localbegin;//souradnice zacatku zpracovavane vypoctove oblasi
         CoordinatesType localgridsize;//velikost lokálního gridu včetně překryvů
         CoordinatesType overlap;
+        CoordinatesType globalsize;//velikost celé sítě
+        CoordinatesType globalbegin;
         
         
         IndexType Dimensions;        
@@ -99,6 +101,8 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
                localorigin=globalGrid.getOrigin();
                localgridsize=globalGrid.getDimensions();
                localsize=globalGrid.getDimensions();
+               globalsize=globalGrid.getDimensions();
+               globalbegin=CoordinatesType(0);
                localbegin.x()=0;
                localbegin.y()=0;
                
@@ -119,10 +123,12 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
                   procsdistr[1]=0;
                }
                CommunicatorType::DimsCreate(nproc, 2, procsdistr);
+
                myproccoord[0]=rank%procsdistr[0];
                myproccoord[1]=rank/procsdistr[0];        
 
-               //compute local mesh size              
+               //compute local mesh size
+               globalsize=globalGrid.getDimensions();              
                numberoflarger[0]=globalGrid.getDimensions().x()%procsdistr[0];
                numberoflarger[1]=globalGrid.getDimensions().y()%procsdistr[1];
 
@@ -135,20 +141,17 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
                    localsize.y()+=1;
 
                if(numberoflarger[0]>myproccoord[0])
-                   localorigin.x()=globalGrid.getOrigin().x()
-                                +(myproccoord[0]*localsize.x()-overlap.x())*globalGrid.getSpaceSteps().x();
+                   globalbegin.x()=myproccoord[0]*localsize.x();
                else
-                   localorigin.x()=globalGrid.getOrigin().x()
-                                +(numberoflarger[0]*(localsize.x()+1)+(myproccoord[0]-numberoflarger[0])*localsize.x()-overlap.x())
-                                *globalGrid.getSpaceSteps().x();
-               
+                   globalbegin.x()=numberoflarger[0]*(localsize.x()+1)+(myproccoord[0]-numberoflarger[0])*localsize.x();
+
                if(numberoflarger[1]>myproccoord[1])
-                   localorigin.y()=globalGrid.getOrigin().y()
-                                +(myproccoord[1]*localsize.y()-overlap.y())*globalGrid.getSpaceSteps().y();
+                   globalbegin.y()=myproccoord[1]*localsize.y();
+
                else
-                   localorigin.y()=globalGrid.getOrigin().y()
-                                +(numberoflarger[1]*(localsize.y()+1)+(myproccoord[1]-numberoflarger[1])*localsize.y()-overlap.y())
-                                *globalGrid.getSpaceSteps().y();
+                   globalbegin.y()=numberoflarger[1]*(localsize.y()+1)+(myproccoord[1]-numberoflarger[1])*localsize.y();
+
+               localorigin=globalGrid.getOrigin()+TNL::Containers::tnlDotProduct(globalGrid.getSpaceSteps(),globalbegin-overlap);
 
                //nearnodes
                if(myproccoord[0]>0)
@@ -209,15 +212,15 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
            grid.SetDistMesh(this);
        };
        
-       void printcoords(std::ostream& out)
+       String printProcessCoords()
        {
-           out<<"("<<myproccoord[0]<<","<<myproccoord[1]<<"):";
+           return convertToString(myproccoord[0])+String("-")+convertToString(myproccoord[1]);
        };
 
-       void printdistr(std::ostream& out)
+       String printProcessDistr()
        {
-           out<<"("<<procsdistr[0]<<","<<procsdistr[1]<<"):";
-       };
+           return convertToString(procsdistr[0])+String("-")+convertToString(procsdistr[1]);
+       };  
        
        bool IsDistributed(void)
        {
@@ -240,6 +243,18 @@ class DistributedMesh<Grid< 2, RealType, Device, Index >>
            return this->localsize;
        }
 
+       //number of elements of global grid
+       CoordinatesType getGlobalSize()
+       {
+           return this->globalsize;
+       }
+
+       //coordinates of begin of local subdomain without overlaps in global grid
+       CoordinatesType getGlobalBegin()
+       {
+           return this->globalbegin;
+       }
+
        CoordinatesType getLocalGridSize()
        {
            return this->localgridsize;
diff --git a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_3D.h b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_3D.h
index 36488d10e8ef337cba241688dd304b46be57b459..f12950178a7efcbc3017c0a288024096573e1aa9 100644
--- a/src/TNL/Meshes/DistributedMeshes/DistributedGrid_3D.h
+++ b/src/TNL/Meshes/DistributedMeshes/DistributedGrid_3D.h
@@ -46,7 +46,8 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
         CoordinatesType localgridsize;
         CoordinatesType localbegin;
         CoordinatesType overlap;
-        
+        CoordinatesType globalsize;
+        CoordinatesType globalbegin;
         
         IndexType Dimensions;        
         bool isDistributed;
@@ -111,7 +112,9 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
                
                localorigin=globalGrid.getOrigin();
                localsize=globalGrid.getDimensions();
+               globalsize=globalGrid.getDimensions();
                localgridsize=localsize;
+               globalbegin=CoordinatesType(0);
                return;
            }
            else
@@ -135,7 +138,8 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
                myproccoord[1]=(rank%(procsdistr[0]*procsdistr[1]))/procsdistr[0];
                myproccoord[0]=(rank%(procsdistr[0]*procsdistr[1]))%procsdistr[0];
 
-               //compute local mesh size               
+               //compute local mesh size 
+               globalsize=globalGrid.getDimensions();                
                numberoflarger[0]=globalGrid.getDimensions().x()%procsdistr[0];
                numberoflarger[1]=globalGrid.getDimensions().y()%procsdistr[1];
                numberoflarger[2]=globalGrid.getDimensions().z()%procsdistr[2];
@@ -152,28 +156,21 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
                    localsize.z()+=1;
                                   
                if(numberoflarger[0]>myproccoord[0])
-                   localorigin.x()=globalGrid.getOrigin().x()
-                                +(myproccoord[0]*localsize.x()-overlap.x())*globalGrid.getSpaceSteps().x();
+                   globalbegin.x()=myproccoord[0]*localsize.x();
                else
-                   localorigin.x()=globalGrid.getOrigin().x()
-                                +(numberoflarger[0]*(localsize.x()+1)+(myproccoord[0]-numberoflarger[0])*localsize.x()-overlap.x())
-                                *globalGrid.getSpaceSteps().x();
-               
+                   globalbegin.x()=numberoflarger[0]*(localsize.x()+1)+(myproccoord[0]-numberoflarger[0])*localsize.x();
+                 
                if(numberoflarger[1]>myproccoord[1])
-                   localorigin.y()=globalGrid.getOrigin().y()
-                                +(myproccoord[1]*localsize.y()-overlap.y())*globalGrid.getSpaceSteps().y();
+                   globalbegin.y()=myproccoord[1]*localsize.y();
                else
-                   localorigin.y()=globalGrid.getOrigin().y()
-                                +(numberoflarger[1]*(localsize.y()+1)+(myproccoord[1]-numberoflarger[1])*localsize.y()-overlap.y())
-                                *globalGrid.getSpaceSteps().y();
-
+                   globalbegin.y()=numberoflarger[1]*(localsize.y()+1)+(myproccoord[1]-numberoflarger[1])*localsize.y();
+ 
                if(numberoflarger[2]>myproccoord[2])
-                   localorigin.z()=globalGrid.getOrigin().z()
-                                +(myproccoord[2]*localsize.z()-overlap.z())*globalGrid.getSpaceSteps().z();
+                   globalbegin.z()=myproccoord[2]*localsize.z();
                else
-                   localorigin.z()=globalGrid.getOrigin().z()
-                                +(numberoflarger[2]*(localsize.z()+1)+(myproccoord[2]-numberoflarger[2])*localsize.z()-overlap.z())
-                                *globalGrid.getSpaceSteps().z();
+                   globalbegin.z()=numberoflarger[2]*(localsize.z()+1)+(myproccoord[2]-numberoflarger[2])*localsize.z();
+
+               localorigin=globalGrid.getOrigin()+TNL::Containers::tnlDotProduct(globalGrid.getSpaceSteps(),globalbegin-overlap);
                
                //nearnodes
                //X Y Z
@@ -284,16 +281,16 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
            grid.SetDistMesh(this);
        };
        
-       void printcoords(std::ostream& out )
+       String printProcessCoords()
        {
-           out<<"("<<myproccoord[0]<<","<<myproccoord[1]<<","<<myproccoord[2]<<"):";
+           return convertToString(myproccoord[0])+String("-")+convertToString(myproccoord[1])+String("-")+convertToString(myproccoord[2]);
        };
 
-       void printdistr(std::ostream& out)
+       String printProcessDistr()
        {
-           out<<"("<<procsdistr[0]<<","<<procsdistr[1]<<","<<procsdistr[2]<<"):";
-       };
-       
+           return convertToString(procsdistr[0])+String("-")+convertToString(procsdistr[1])+String("-")+convertToString(procsdistr[2]);
+       };  
+
        bool IsDistributed(void)
        {
            return this->isDistributed;
@@ -324,6 +321,18 @@ class DistributedMesh<Grid< 3, RealType, Device, Index >>
        {
            return this->localbegin;
        }
+
+       //number of elements of global grid
+       CoordinatesType getGlobalSize()
+       {
+           return this->globalsize;
+       }
+
+       //coordinates of begin of local subdomain without overlaps in global grid
+       CoordinatesType getGlobalBegin()
+       {
+           return this->globalbegin;
+       }
        
        private:
            
diff --git a/src/TNL/Meshes/GridDetails/Grid2D_impl.h b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
index 3631661bbe874176ab9704654f8895568a709f5b..91ccf4e1306a6d35b47b6c919833c6eaa6e455f8 100644
--- a/src/TNL/Meshes/GridDetails/Grid2D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid2D_impl.h
@@ -32,7 +32,8 @@ Grid< 2, Real, Device, Index > :: Grid()
   numberOfNxFaces( 0 ),
   numberOfNyFaces( 0 ),
   numberOfFaces( 0 ),
-  numberOfVertices( 0 )
+  numberOfVertices( 0 ),
+  distGrid(nullptr)
 {
 }
 
diff --git a/src/TNL/Meshes/GridDetails/Grid3D_impl.h b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
index e46310b2852a5348451e5ee601e05caa73b56426..fa1c32dd2ba9fab60f1e9fffc9ce1d592885bfb6 100644
--- a/src/TNL/Meshes/GridDetails/Grid3D_impl.h
+++ b/src/TNL/Meshes/GridDetails/Grid3D_impl.h
@@ -39,7 +39,8 @@ Grid< 3, Real, Device, Index > :: Grid()
   numberOfDzEdges( 0 ),
   numberOfDxAndDyEdges( 0 ),
   numberOfEdges( 0 ),
-  numberOfVertices( 0 )
+  numberOfVertices( 0 ),
+  distGrid(nullptr)
 {
 }
 
diff --git a/src/Tools/tnl-init.h b/src/Tools/tnl-init.h
index ab508a5832491c05c223f4ee759efdd4f639b5fb..67fbba0a5ec58093dcad37d2805e946cbd95446c 100644
--- a/src/Tools/tnl-init.h
+++ b/src/Tools/tnl-init.h
@@ -117,13 +117,7 @@ bool renderFunction( const Config::ParameterContainer& parameters )
 
       if(CommunicatorType::isDistributed())
       {
-        File file;
-        file.open( outputFile+convertToString(CommunicatorType::GetRank()), IOMode::write );
-        File meshFile;
-        meshFile.open(convertToString(CommunicatorType::GetRank())+String("mesh.tnl"),IOMode::write);
-        Meshes::DistributedMeshes::DistributedGridIO<MeshFunctionType> ::save(file,meshFile, *meshFunction );
-        meshFile.close();
-        file.close();
+        Meshes::DistributedMeshes::DistributedGridIO<MeshFunctionType> ::save(outputFile, *meshFunction );
       }
       else
       {
diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt
index cdf9f59403c9cf2b6b0388db2d4b27d3f6e1f6d1..21fef2e5607ea0f6c08d53e8f4d966fb4f236f98 100644
--- a/src/UnitTests/CMakeLists.txt
+++ b/src/UnitTests/CMakeLists.txt
@@ -45,6 +45,6 @@ ADD_TEST( FileTest ${EXECUTABLE_OUTPUT_PATH}/FileTest${CMAKE_EXECUTABLE_SUFFIX}
 ADD_TEST( StringTest ${EXECUTABLE_OUTPUT_PATH}/StringTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( ObjectTest ${EXECUTABLE_OUTPUT_PATH}/ObjectTest${CMAKE_EXECUTABLE_SUFFIX} )
 ADD_TEST( UniquePointerTest ${EXECUTABLE_OUTPUT_PATH}/UniquePointerTest${CMAKE_EXECUTABLE_SUFFIX} )
-ADD_TEST( SaveAndLoadMeshfunctionTest ${EXECUTABLE_OUTPUT_PATH}/SaveAndLoadMeshfunctionTest )
+ADD_TEST( SaveAndLoadMeshfunctionTest ${EXECUTABLE_OUTPUT_PATH}/SaveAndLoadMeshfunctionTest${CMAKE_EXECUTABLE_SUFFIX} )
 
 endif( WITH_TESTS STREQUAL "yes" )
diff --git a/src/UnitTests/Mpi/CMakeLists.txt b/src/UnitTests/Mpi/CMakeLists.txt
index 3fee83ec98b46833d44c069d821ea16d12bf72e1..5dca444aa40876bd5df0286b3393b7934168149b 100644
--- a/src/UnitTests/Mpi/CMakeLists.txt
+++ b/src/UnitTests/Mpi/CMakeLists.txt
@@ -28,19 +28,19 @@ ADD_EXECUTABLE( DistributedGridIOTest DistributedGridIOTest.cpp )
    TARGET_LINK_LIBRARIES( DistributedGridIOTest
                               ${GTEST_BOTH_LIBRARIES}
                               tnl )
+
+ADD_TEST( NAME CopyEntitesTest COMMAND ${EXECUTABLE_OUTPUT_PATH}/CopyEntitesTest${CMAKE_EXECUTABLE_SUFFIX} )
    
-SET (mpi_test_parameters_1d -np 4 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_1D")
+SET (mpi_test_parameters_1d -np 4 -H localhost:4 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_1D${CMAKE_EXECUTABLE_SUFFIX}")
 ADD_TEST( NAME DistributedGridTest_1D COMMAND "mpirun" ${mpi_test_parameters_1d})
 
-SET (mpi_test_parameters_2d -np 9 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_2D")
+SET (mpi_test_parameters_2d -np 9 -H localhost:9 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_2D${CMAKE_EXECUTABLE_SUFFIX}")
 ADD_TEST( NAME DistributedGridTest_2D COMMAND "mpirun" ${mpi_test_parameters_2d})
 
-SET (mpi_test_parameters_3d -np 27 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_3D")
+SET (mpi_test_parameters_3d -np 27 -H localhost:27 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridTest_3D${CMAKE_EXECUTABLE_SUFFIX}")
 ADD_TEST( NAME DistributedGridTest_3D COMMAND "mpirun" ${mpi_test_parameters_3d})
 
-ADD_TEST( CopyEntitesTest ${EXECUTABLE_OUTPUT_PATH}/CopyEntitesTest )
-
-SET (mpi_test_parameters_IO -np 4 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridIOTest")
+SET (mpi_test_parameters_IO -np 4 "${EXECUTABLE_OUTPUT_PATH}/DistributedGridIOTest${CMAKE_EXECUTABLE_SUFFIX}")
 ADD_TEST( NAME DistributedGridIOTest COMMAND "mpirun" ${mpi_test_parameters_IO})
 
 IF( BUILD_CUDA )
diff --git a/src/UnitTests/Mpi/DistributedGridIOTest.h b/src/UnitTests/Mpi/DistributedGridIOTest.h
index 25f1d1988c60893e39832405c66cdd52d2deda52..99a725a2f46a6bddc5d8e480c92853da3f112d5a 100644
--- a/src/UnitTests/Mpi/DistributedGridIOTest.h
+++ b/src/UnitTests/Mpi/DistributedGridIOTest.h
@@ -238,16 +238,10 @@ class TestDistributedGridIO{
         meshFunctionptr->bind(gridptr,dof);
             
         linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
-        
-        File file;
-
-        File meshFile;
-        file.open( String( "/tmp/test-file.tnl-" )+convertToString(CommunicatorType::GetRank()), IOMode::write );
-        meshFile.open( String( "/tmp/test-file-mesh.tnl-" )+convertToString(CommunicatorType::GetRank()), IOMode::write );
-        DistributedGridIO<MeshFunctionType> ::save(file,meshFile, *meshFunctionptr );
-        meshFile.close();
+ 
+        String FileName=String("/tmp/test-file.tnl");
+        DistributedGridIO<MeshFunctionType> ::save(FileName, *meshFunctionptr );
 
-        file.close();
 
        //create similar local mesh function and evaluate linear function on it
         PointType localOrigin=parametry.getOrigin(CommunicatorType::GetRank());        
@@ -274,8 +268,8 @@ class TestDistributedGridIO{
 
         loadDof.setValue(-1);
         
-
-        file.open( String( "/tmp/test-file.tnl-" )+convertToString(CommunicatorType::GetRank()), IOMode::read );
+        File file;
+        file.open( FileName+String("-")+distrgrid.printProcessCoords(), IOMode::read );
         loadMeshFunctionptr->boundLoad(file);
         file.close();
 
@@ -292,6 +286,24 @@ class TestDistributedGridIO{
         
         ParameterProvider<dim,Device> parametry;
 
+        //Crete distributed grid            
+        PointType globalOrigin;
+        globalOrigin.setValue(-0.5);
+
+        PointType globalProportions;
+        globalProportions.setValue(20);
+
+        MeshType globalGrid;
+        globalGrid.setDimensions(globalProportions);
+        globalGrid.setDomain(globalOrigin,globalProportions);
+        
+        int *distr=parametry.getDistr();
+
+        CoordinatesType overlap;
+        overlap.setValue(1);
+        DistributedGridType distrgrid;
+        distrgrid.template setGlobalGrid<CommunicatorType>(globalGrid,overlap, distr);
+
         //save files from local mesh        
         PointType localOrigin=parametry.getOrigin(CommunicatorType::GetRank());        
         PointType localProportions=parametry.getProportions(CommunicatorType::GetRank());;
@@ -306,28 +318,14 @@ class TestDistributedGridIO{
         localMeshFunctionptr->bind(localGridptr,localDof);
         linearFunctionEvaluator.evaluateAllEntities(localMeshFunctionptr , linearFunctionPtr);
 
+
+        String FileName=String("/tmp/test-file.tnl");
         File file;
-        file.open( String( "/tmp/test-file.tnl-" )+convertToString(CommunicatorType::GetRank()), IOMode::write );        
+        file.open( FileName+String("-")+distrgrid.printProcessCoords(), IOMode::write );        
         localMeshFunctionptr->save(file);
         file.close();
 
-        //Crete distributed grid            
-        PointType globalOrigin;
-        globalOrigin.setValue(-0.5);
-
-        PointType globalProportions;
-        globalProportions.setValue(20);
-
-        MeshType globalGrid;
-        globalGrid.setDimensions(globalProportions);
-        globalGrid.setDomain(globalOrigin,globalProportions);
-        
-        int *distr=parametry.getDistr();
 
-        CoordinatesType overlap;
-        overlap.setValue(1);
-        DistributedGridType distrgrid;
-        distrgrid.template setGlobalGrid<CommunicatorType>(globalGrid,overlap, distr);
 
         //Crete "distributedgrid driven" grid filed by load
         SharedPointer<MeshType> loadGridptr;
@@ -338,12 +336,7 @@ class TestDistributedGridIO{
         loadDof.setValue(0);
         loadMeshFunctionptr->bind(loadGridptr,loadDof);
 
-            
-        file.open( String( "/tmp/test-file.tnl-" )+convertToString(CommunicatorType::GetRank()), IOMode::read );  
-  
-        DistributedGridIO<MeshFunctionType> ::load(file, *loadMeshFunctionptr );
-
-        file.close();
+        DistributedGridIO<MeshFunctionType> ::load(FileName, *loadMeshFunctionptr );
 
         loadMeshFunctionptr->template Synchronize<CommunicatorType>(); //need synchronization for overlaps to be filled corectly in loadDof
 
diff --git a/tests/mpi/CMakeLists.txt b/tests/mpi/CMakeLists.txt
index e42d94f65f9210b63291755bcb15431dd960c5ea..0be5708088fcf742181c0ef3c0061484883841e5 100644
--- a/tests/mpi/CMakeLists.txt
+++ b/tests/mpi/CMakeLists.txt
@@ -16,10 +16,16 @@
    	TARGET_COMPILE_DEFINITIONS( tnlMeshFuncttionEvaluateTestY PUBLIC "-DDIMENSION=2" )
    	TARGET_COMPILE_DEFINITIONS( tnlMeshFuncttionEvaluateTestY PUBLIC "-DYDISTR" )
 
+
     ADD_EXECUTABLE( mpiio-save-test ${headers} mpiio-save-test.cpp )   
-    TARGET_LINK_LIBRARIES( tnlMeshFuncttionEvaluateTestY ${CPPUNIT_LIBRARIES}
+    TARGET_LINK_LIBRARIES(  mpiio-save-test ${CPPUNIT_LIBRARIES}
+                                                           tnl )
+
+    ADD_EXECUTABLE( mpiio-save-load-test ${headers} mpiio-save-load-test.cpp )   
+    TARGET_LINK_LIBRARIES(  mpiio-save-load-test ${CPPUNIT_LIBRARIES}
                                                            tnl )
 
+
     #ADD_EXECUTABLE( tnlMeshFuncttionEvaluateTestZ ${headers} MeshFunctionEvauateTest.cpp )   
     #TARGET_LINK_LIBRARIES( tnlMeshFuncttionEvaluateTestZ ${CPPUNIT_LIBRARIES}
           #                                                  tnl${mpiExt}-0.1 )
@@ -40,4 +46,8 @@ IF( BUILD_CUDA )
     CUDA_ADD_EXECUTABLE( GPUmeshFunctionEvaluateTest ${headers} GPUmeshFuncitonEvaluateTest.cu )
     TARGET_LINK_LIBRARIES( GPUmeshFunctionEvaluateTest ${CPPUNIT_LIBRARIES}
                                            tnl )   
+
+    CUDA_ADD_EXECUTABLE( mpiio-save-test-gpu ${headers} mpiio-save-test.cu )   
+    TARGET_LINK_LIBRARIES(  mpiio-save-test-gpu ${CPPUNIT_LIBRARIES}
+                                                           tnl )
 ENDIF( BUILD_CUDA )    
diff --git a/tests/mpi/GPUmeshFuncitonEvaluateTest.cu b/tests/mpi/GPUmeshFuncitonEvaluateTest.cu
index c179304c878e940244c8320a92692e336f79ce4e..b421dc9c6300074a0dc390be89888341b79fbc5d 100644
--- a/tests/mpi/GPUmeshFuncitonEvaluateTest.cu
+++ b/tests/mpi/GPUmeshFuncitonEvaluateTest.cu
@@ -151,7 +151,7 @@ int main ( int argc, char *argv[])
     cout << sum <<endl<<endl;  
     
     cout<<"distr: ";
-    distrgrid.printdistr(cout);
+    cout << distrgrid.printProcessDistr().getString();
     cout << endl;
   
     cout<<"setup: "<<setup.getRealTime() <<endl;
diff --git a/tests/mpi/MeshFunctionEvauateTest.cpp b/tests/mpi/MeshFunctionEvauateTest.cpp
index 9298f8a51fe0c71e3dfec05c42dfb26cbf3ef567..8aa6fee8ab142f7e79041405fca4ce517038dacc 100644
--- a/tests/mpi/MeshFunctionEvauateTest.cpp
+++ b/tests/mpi/MeshFunctionEvauateTest.cpp
@@ -159,7 +159,7 @@ int main ( int argc, char *argv[])
     cout << sum <<endl<<endl;  
     
     cout<<"distr: ";
-    distrgrid.printdistr(cout);
+    cout << distrgrid.printProcessDistr().getString();
     cout << endl;
   
     cout<<"setup: "<<setup.getRealTime() <<endl;
diff --git a/tests/mpi/mpiio-save-load-test.cpp b/tests/mpi/mpiio-save-load-test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b669df776d6a945551942a5d64a82179f812a39d
--- /dev/null
+++ b/tests/mpi/mpiio-save-load-test.cpp
@@ -0,0 +1,94 @@
+#include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
+#include <TNL/Functions/MeshFunction.h>
+
+#define MPIIO
+#include <TNL/Meshes/DistributedMeshes/DistributedGridIO.h>
+
+
+#include "../../src/UnitTests/Mpi/Functions.h"
+
+#define DIM 1 
+
+using namespace TNL::Containers;
+using namespace TNL::Meshes;
+using namespace TNL::Functions;
+using namespace TNL::Devices;
+using namespace TNL::Communicators;
+using namespace TNL::Meshes::DistributedMeshes;
+
+
+int main(int argc, char **argv)
+{
+
+        typedef Host Device;
+        typedef MpiCommunicator CommunicatorType;
+
+        typedef Grid<DIM,double,Device,int> MeshType;
+        typedef MeshFunction<MeshType> MeshFunctionType;
+        typedef Vector<double,Device,int> DofType;
+        typedef typename MeshType::Cell Cell;
+        typedef typename MeshType::IndexType IndexType; 
+        typedef typename MeshType::PointType PointType; 
+        typedef DistributedMesh<MeshType> DistributedGridType;
+
+        typedef typename DistributedGridType::CoordinatesType CoordinatesType;
+        typedef LinearFunction<double,DIM> LinearFunctionType;
+
+        CommunicatorType::Init(argc, argv);
+
+        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
+                
+        //save distributed meshfunction into files
+        PointType globalOrigin;
+        globalOrigin.setValue(-0.5);
+
+        PointType globalProportions;
+        globalProportions.setValue(10);
+
+
+        MeshType globalGrid;
+        globalGrid.setDimensions(globalProportions);
+        globalGrid.setDomain(globalOrigin,globalProportions);
+        
+        File meshFile;
+        meshFile.open( String("globalGrid.tnl"),IOMode::write);
+        globalGrid.save( meshFile );
+        meshFile.close();
+
+        CoordinatesType overlap;
+        overlap.setValue(1);
+        DistributedGridType distrgrid;
+        distrgrid.template setGlobalGrid<CommunicatorType>(globalGrid,overlap);
+
+        SharedPointer<MeshType> gridptr;
+        SharedPointer<MeshFunctionType> meshFunctionptr;
+        distrgrid.SetupGrid(*gridptr);
+       
+        DofType dofsave(gridptr->template getEntitiesCount< Cell >());
+        dofsave.setValue(0);
+        meshFunctionptr->bind(gridptr,dofsave);
+            
+        linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
+        
+        String fileName=String("./meshFunction.tnl");
+        DistributedGridIO<MeshFunctionType,MpiIO> ::save(fileName, *meshFunctionptr );
+
+        DofType dofload(gridptr->template getEntitiesCount< Cell >());
+        dofload.setValue(0);
+        meshFunctionptr->bind(gridptr,dofload);
+
+        DistributedGridIO<MeshFunctionType,MpiIO> ::load(fileName, *meshFunctionptr );
+
+        for(int i=0;i<dofload.getSize();i++)
+        {
+            if(dofsave[i]!=dofload[i])
+                std::cout<<"Chyba na pozici "<< i << " dofsave: "<< dofsave[i] << " dofload: "<<dofload[i] <<std::endl;
+            else
+                std::cout <<"Ok!"<<std::endl;
+        }
+
+        CommunicatorType::Finalize();
+
+}
diff --git a/tests/mpi/mpiio-save-test.cpp b/tests/mpi/mpiio-save-test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c946a2d637e0e15a4d63a443a12ae055ddfcc6d7
--- /dev/null
+++ b/tests/mpi/mpiio-save-test.cpp
@@ -0,0 +1,3 @@
+#define DEVICE Host
+
+#include "mpiio-save-test.h"
diff --git a/tests/mpi/mpiio-save-test.cu b/tests/mpi/mpiio-save-test.cu
new file mode 100644
index 0000000000000000000000000000000000000000..24f98056dd37fe202f0e5809620d00a1ab323777
--- /dev/null
+++ b/tests/mpi/mpiio-save-test.cu
@@ -0,0 +1,3 @@
+#define DEVICE Cuda
+
+#include "mpiio-save-test.h"
diff --git a/tests/mpi/mpiio-save-test.h b/tests/mpi/mpiio-save-test.h
new file mode 100644
index 0000000000000000000000000000000000000000..0100398fc11e8eab479cbf2f368f6b6405b77633
--- /dev/null
+++ b/tests/mpi/mpiio-save-test.h
@@ -0,0 +1,83 @@
+#include <TNL/Communicators/MpiCommunicator.h>
+#include <TNL/Meshes/DistributedMeshes/DistributedMesh.h>
+#include <TNL/Functions/MeshFunction.h>
+
+#define MPIIO
+#include <TNL/Meshes/DistributedMeshes/DistributedGridIO.h>
+
+
+#include "../../src/UnitTests/Mpi/Functions.h"
+
+#define DIM 2
+
+using namespace TNL::Containers;
+using namespace TNL::Meshes;
+using namespace TNL::Functions;
+using namespace TNL::Devices;
+using namespace TNL::Communicators;
+using namespace TNL::Meshes::DistributedMeshes;
+
+
+int main(int argc, char **argv)
+{
+
+        typedef DEVICE Device;
+        typedef MpiCommunicator CommunicatorType;
+
+        typedef Grid<DIM,double,Device,int> MeshType;
+        typedef MeshFunction<MeshType> MeshFunctionType;
+        typedef Vector<double,Device,int> DofType;
+        typedef typename MeshType::Cell Cell;
+        typedef typename MeshType::IndexType IndexType; 
+        typedef typename MeshType::PointType PointType; 
+        typedef DistributedMesh<MeshType> DistributedGridType;
+
+        typedef typename DistributedGridType::CoordinatesType CoordinatesType;
+        typedef LinearFunction<double,DIM> LinearFunctionType;
+
+        CommunicatorType::Init(argc, argv);
+
+        SharedPointer< LinearFunctionType, Device > linearFunctionPtr;
+        MeshFunctionEvaluator< MeshFunctionType, LinearFunctionType > linearFunctionEvaluator;    
+                
+        //save distributed meshfunction into files
+        PointType globalOrigin;
+        globalOrigin.setValue(-0.5);
+
+        PointType globalProportions;
+        //globalProportions.setValue(10);
+        globalProportions.x()=10;   
+        globalProportions.y()=20;   
+        
+
+
+        MeshType globalGrid;
+        globalGrid.setDimensions(globalProportions);
+        globalGrid.setDomain(globalOrigin,globalProportions);
+        
+        File meshFile;
+        meshFile.open( String("globalGrid.tnl"),IOMode::write);
+        globalGrid.save( meshFile );
+        meshFile.close();
+
+        CoordinatesType overlap;
+        overlap.setValue(1);
+        DistributedGridType distrgrid;
+        distrgrid.template setGlobalGrid<CommunicatorType>(globalGrid,overlap);
+
+        SharedPointer<MeshType> gridptr;
+        SharedPointer<MeshFunctionType> meshFunctionptr;
+        distrgrid.SetupGrid(*gridptr);
+       
+        DofType dof(gridptr->template getEntitiesCount< Cell >());
+        dof.setValue(0);
+        meshFunctionptr->bind(gridptr,dof);
+            
+        linearFunctionEvaluator.evaluateAllEntities(meshFunctionptr , linearFunctionPtr);
+        
+        String fileName=String("./meshFunction.tnl");
+        DistributedGridIO<MeshFunctionType,MpiIO> ::save(fileName, *meshFunctionptr );
+
+        CommunicatorType::Finalize();
+
+}
diff --git a/tests/mpi/simple-mpiio.cu b/tests/mpi/simple-mpiio.cu
new file mode 100644
index 0000000000000000000000000000000000000000..1b8de26039b14b230a1b3a5e2d6d43cbc629d062
--- /dev/null
+++ b/tests/mpi/simple-mpiio.cu
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <mpi.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv)
+{
+
+   int localsize = 30;
+   int overlap=1;
+   int globalsize = localsize+2*overlap;
+
+   double * data;
+   //data=(double *)malloc(globalsize*globalsize*sizeof(double));
+   cudaMalloc((void **)&data, globalsize*globalsize*sizeof(double));
+
+   MPI_Init(&argc,&argv);
+
+   int rank;
+   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+   int size;
+   MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+   printf("rank: %d, size %d\n",rank, size);
+
+   int fgsize[2],flsize[2],fstarts[2];
+   fgsize[0]=size*localsize;
+   fgsize[1]=localsize;
+   flsize[0]=localsize;
+   flsize[1]=localsize;
+   fstarts[0]=rank*localsize;
+   fstarts[1]=0;
+
+   MPI_Datatype ftype;
+
+   MPI_Type_create_subarray(2,
+        fgsize,flsize,fstarts,
+        MPI_ORDER_C,MPI_DOUBLE,&ftype);
+
+   MPI_Type_commit(&ftype);
+
+
+   int agsize[2],alsize[2],astarts[2];
+   agsize[0]=globalsize;
+   agsize[1]=globalsize;
+   alsize[0]=localsize;
+   alsize[1]=localsize;
+   astarts[0]=overlap;
+   astarts[1]=overlap;
+
+   MPI_Datatype atype;
+
+   MPI_Type_create_subarray(2,
+        agsize,alsize,astarts,
+        MPI_ORDER_C,MPI_DOUBLE,&atype);
+
+   MPI_Type_commit(&atype);
+
+
+   MPI_File file;
+   MPI_File_open(MPI_COMM_WORLD,"./pokus.file",
+	MPI_MODE_CREATE|MPI_MODE_WRONLY,
+        MPI_INFO_NULL, &file);
+
+   MPI_File_set_view(file,0,MPI_DOUBLE,ftype,"native",MPI_INFO_NULL);
+
+   MPI_Status wstatus;
+   MPI_File_write(file,data,1,atype,&wstatus);
+
+   MPI_File_close(&file);
+
+   MPI_Finalize();
+
+   free(data);
+
+return 0;
+}
+