Commit f6e34a44 authored by Matouš Fencl's avatar Matouš Fencl Committed by Jakub Klinkovský
Browse files

GEM can now calculate with multiple grid (matrix size 8000 rows +), better...

GEM can now calculate with multiple grid (matrix size 8000 rows +), better matrix reading and memory operations implemented. Text almost done.
parent 831c3f25
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -6,8 +6,8 @@ TNL_HEADERS = ${HOME}/.local/include
INSTALL_DIR = ${HOME}/.local
WITH_CUDA = yes
WITH_MPI = yes
WITH_OPENMP = yes
WITH_DEBUG = yes
WITH_OPENMP = no
WITH_DEBUG = no
MPI_FLAGT =
MPI_FLAGS =
#MPI_FLAGT =-L/home/maty/.openmpi/lib -lmpi
+8 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ class Matrix
    typedef Real RealType;
    typedef TNL::Containers::Vector< Index, Device, Index > CompressedRowLengthsVector;
    typedef TNL::Containers::Vector< Real, Device, Index > Vector;
    typedef TNL::Containers::Vector< Real, TNL::Devices::Host, Index > VectorHost;
    typedef TNL::Containers::VectorView< Real, Device, Index > VectorView;
    typedef TNL::Containers::VectorView< Real, TNL::Devices::Host, Index > HostVectorView;
    typedef TNL::Containers::VectorView< Real, TNL::Devices::Cuda, Index > DeviceVectorView;
@@ -28,6 +29,13 @@ class Matrix
    Matrix( const Index rows, const Index columns );
    Matrix( Matrix< Real, Device, Index>& matrix );
    
    /**
    * Reset of matrix to 0 rows, 0 columns and 0 data memory.
    *
    * @return void
    */
    void reset();
    
    /**
    * Sets dimension for matrix of rows x columns elements and allocates memory
    * for data array of this matrix.
+37 −7
Original line number Diff line number Diff line
@@ -25,6 +25,18 @@ Matrix< Real, Device, Index >::Matrix( Matrix< Real, Device, Index>& matrix )
  matrix.getData( this->data );
}

template < typename Real,
        typename Device,
        typename Index >
void Matrix< Real, Device, Index >::reset( )
{
  this->rows = 1;
  this->columns	= 1;
  this->data.reset();
  this->data.setSize( 1	);
}



template < typename Real,
        typename Device,
@@ -40,8 +52,8 @@ void Matrix< Real, Device, Index >::setDimensions( const Index rows, const Index
#if DEBUG
    printf("We are making host array!\n");
#endif
    data.setSize( rows*columns );
    data.setValue( 0 );
    this->data.setSize( rows*columns );
    this->data.setValue( 0 );
  }
#ifdef HAVE_CUDA
  if( std::is_same< Device, TNL::Devices::Cuda >::value )
@@ -321,27 +333,45 @@ Matrix<Real, Device, Index >&
Matrix< Real, Device, Index >::operator=( Matrix< Real, Device2, Index>& matrix )
{
  this->setDimensions( matrix.getNumRows(), matrix.getNumColumns() );
  Vector pom;
  matrix.getData( pom );
  if( std::is_same< Device, Device2 >::value )
  {
    Vector pom;
    matrix.getData( pom );
    this->data = pom; 
  }
#ifdef HAVE_CUDA
  else if( std::is_same< Device, TNL::Devices::Host >::value )
  {
    VectorHost pom;
    matrix.getData( pom );
    
    VectorHost pom1;
    pom1.setSize( this->data.getSize() );

#ifdef HAVE_OPENMP
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
#endif
    for( int i = 0; i < this->rows; i++ )
      for( int j = 0; j < this->columns; j++ )
        this->setElement(i,j, pom[ i*TNL::roundToMultiple( this->rows, TNL::Cuda::getWarpSize() ) + j ] );
        pom1[  i * this->columns + j ] =  pom[ i*TNL::roundToMultiple( this->rows, TNL::Cuda::getWarpSize() ) + j ];
    this->data = pom1;
  } 
  else if( std::is_same< Device, TNL::Devices::Cuda >::value )
  {
    VectorHost pom;
    matrix.getData( pom );
    
    VectorHost pom1;
    pom1.setSize( this->data.getSize() );
#ifdef HAVE_OPENMP
#pragma omp parallel for if( Devices::Host::isOMPEnabled() )
#endif
    for( int i = 0; i < this->getNumRows(); i++ )
      for( int j = 0; j < this->getNumColumns(); j++ )
      {
        this->data.setElement( i*TNL::roundToMultiple( this->columns, TNL::Cuda::getWarpSize() ) + j,
                pom.getElement( i * this->columns + j ) );
        pom1[ i*TNL::roundToMultiple( this->columns, TNL::Cuda::getWarpSize() ) + j ] =  pom[ i * this->columns + j];
      }
    this->data = pom1;
  }
#endif // HAVE_CUDA
  return *this;
+50 −46
Original line number Diff line number Diff line
@@ -3,18 +3,18 @@

#include "Matrix/Matrix.h"
#include "gem/GEM.h"
#include "TNL/tnl-dev/src/TNL/Math.h"
#include <TNL/Math.h>
#include <typeinfo> // type printf
#include <TNL/Matrices/MatrixReader.h>

#include "TNL/tnl-dev/src/TNL/Cuda/CheckDevice.h"
#include "TNL/tnl-dev/src/TNL/Devices/Cuda.h"
#include "TNL/tnl-dev/src/TNL/Containers/Vector.h"
#include "TNL/tnl-dev/src/TNL/Cuda/MemoryHelpers.h"
#include "TNL/tnl-dev/src/TNL/Devices/Host.h"
#include <TNL/Cuda/CheckDevice.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Containers/Vector.h>
#include <TNL/Cuda/MemoryHelpers.h>
#include <TNL/Devices/Host.h>

#ifdef HAVE_MPI
#include "TNL/tnl-dev/src/TNL/Communicators/MpiCommunicator.h"
#include <TNL/Communicators/MpiCommunicator.h>
#include <mpi.h>
#endif

@@ -40,9 +40,9 @@ template < typename Real, typename Index >
void readVector( Vector< Real, Devices::Host, Index >& vector_host, const String& vectorName );


template < typename Real, typename Index, typename Device >
void readMatrixVector( Matrix< Real, Device, Index>& matrix, 
        Vector< Real, Device, Index >& vector,
template < typename Real, typename Index >
void readMatrixVector( Matrix< Real, Devices::Host, Index>& matrix, 
        Vector< Real, Devices::Host, Index >& vector,
        const String& matrixName,  const String& vectorName, 
        Index &rows, Index &nonzeros, const int verbose );

@@ -55,17 +55,22 @@ Vector< Real, Device, Index > runGEM( const String& matrixName, const String& ve
{  
  typedef Matrix< Real, Device, Index > MatrixType;
  typedef Vector< Real, Device, Index > VectorType;
  typedef Matrix< Real, Devices::Host, Index > MatrixTypeHost;
  typedef Vector< Real, Devices::Host, Index > VectorTypeHost;
  
  int processID = 0; // MPI processID, without mpi == 0
#ifdef HAVE_MPI
  MPI_Comm_rank( MPI_COMM_WORLD, &processID );
  Communicators::MpiCommunicator::Barrier( MPI_COMM_WORLD );
#endif
  
  MatrixTypeHost matrixHost;
  VectorTypeHost vectorHost;
  MatrixType matrix;
  VectorType vector;
  VectorType vectorResult;
  
  Index rows, nonzeros;
  readMatrixVector( matrix, vector, matrixName, vectorName, rows, nonzeros, verbose );
  VectorType vectorResult( rows );
  readMatrixVector( matrixHost, vectorHost, matrixName, vectorName, rows, nonzeros, verbose );
    
  // Computation
  double* time;
@@ -80,10 +85,14 @@ Vector< Real, Device, Index > runGEM( const String& matrixName, const String& ve
#ifdef HAVE_MPI
    Communicators::MpiCommunicator::Barrier( MPI_COMM_WORLD );
#endif 
    MatrixType matrixComp = matrix;
    VectorType vectorComp( vector );
    //readMatrixVector( matrixHost, vectorHost, matrixName, vectorName, rows, nonzeros, verbose );
    matrix.reset();
    vector.reset();
    matrix = matrixHost;
    vector = vectorHost;
    vectorResult.setSize( rows );
    vectorResult.setValue( 0 );
    GEM< Real, Device, Index > gem( matrixComp, vectorComp );
    GEM< Real, Device, Index > gem( matrix, vector );
    
#ifdef HAVE_MPI
    Communicators::MpiCommunicator::Barrier( MPI_COMM_WORLD );
@@ -125,15 +134,16 @@ Vector< Real, Device, Index > runGEM( const String& matrixName, const String& ve
  timeMean /= loops;
  
  if( processID == 0 )
    printf("%20s %15d %15d %20s %15s %15s %10d %15.5f %15.5f\n", matrixName.c_str(),
            rows, nonzeros, vectorName == "none" ? "-":vectorName.c_str(),
            device.c_str(), typeid(Real).name() == (string)"f" ? "float":"double", loops, timeMean, error);
    printf("%20s %15s %15s %10d %20s & %15d & %15.3f & %15.3f\n", vectorName == "none" ? "-":vectorName.c_str(),
            device.c_str(), typeid(Real).name() == (string)"f" ? "float":"double", loops, matrixName.c_str(),
            rows, timeMean, error);
  
  delete []time;
  
#ifdef HAVE_MPI
  Communicators::MpiCommunicator::Barrier( MPI_COMM_WORLD );
#endif 
  matrix.reset();
  if( processID == 0 ){
    //printf("%d: returning\n", processID );
    return vectorResult;
@@ -146,47 +156,39 @@ Vector< Real, Device, Index > runGEM( const String& matrixName, const String& ve
  }
}

template < typename Real, typename Index, typename Device >
void readMatrixVector( Matrix< Real, Device, Index>& matrix, 
        Vector< Real, Device, Index >& vector,
template < typename Real, typename Index >
void readMatrixVector( Matrix< Real, Devices::Host, Index>& matrix, 
        Vector< Real, Devices::Host, Index >& vector,
        const String& matrixName,  const String& vectorName, 
        Index &rows, Index &nonzeros, const int verbose )
{
  typedef Matrix< Real, Devices::Host, Index> MatrixHost;
  MatrixHost matrixHost;
  if( verbose > 1 )
    cout << "reading matrix " << matrixName << endl;
  // Get matrix
  Matrices::MatrixReader< MatrixHost > m;
  m.readMtxFile( "./test-matrices/" + matrixName, matrixHost, verbose );
  rows = matrixHost.getNumRows();
  nonzeros = matrixHost.getNumNonzeros();
  
  Matrices::MatrixReader< Matrix< Real, Devices::Host, Index> > m;
  m.readMtxFile( "./test-matrices/" + matrixName, matrix, verbose );
  rows = matrix.getNumRows();
  nonzeros = matrix.getNumNonzeros();
  vector.setSize( rows );
  
  // Get vector
  Vector< Real, Devices::Host, Index > vectorHost( matrixHost.getNumRows() );
  
  if( vectorName == "none" )
    calculHostVecOne( matrixHost, vectorHost, vectorName );
  else
  if( vectorName == "none" ){
    calculHostVecOne( matrix, vector, vectorName );
  }else
  {
    readVector( vectorHost, vectorName );
    readVector( vector, vectorName );
    if( verbose > 1 )
      cout << "reading vector " << vectorName << endl;
  }
  
#ifdef HAVE_MPI
  cutMatrixVectorMPI( matrixHost, vectorHost );
  cutMatrixVectorMPI( matrix, vector );
#endif
  if( verbose > 2 )
    matrixHost.showMatrix();
    matrix.showMatrix();
  
  if( verbose > 2 )
    cout << vectorHost << endl;
  
  // Copy from CPU into matrix dependent on template Device
  vector = vectorHost;
  matrix = matrixHost;
    cout << vector << endl;
}

template < typename Real, typename Index >
@@ -200,7 +202,7 @@ void calculHostVecOne( Matrix< Real, Devices::Host, Index >& matrix,
    {
      pom += matrix.getElement(i,j);
    }
    vector[i] = pom;
    vector.setElement( i, pom );
  }
  
  /*ofstream outdata; // outdata is like cin
@@ -275,6 +277,8 @@ void cutMatrixVectorMPI( Matrix< Real, Devices::Host, Index >& matrix,
        matrixTemp.setElement(i,j,0);
      }
    }
    //if( j % 1000 == 0 )
    //  printf("%d: cutting col %d\n", processID, j );
  }
  for( int i = 0; i < numRowsCUT; i++ ){
    if( i + numRowsCUT * processID < matrix.getNumRows() ){
@@ -283,7 +287,7 @@ void cutMatrixVectorMPI( Matrix< Real, Devices::Host, Index >& matrix,
      vectorTemp.setElement( i, 0 );
    }
  }
  
  matrix.reset(); vector.reset();
  matrix = matrixTemp;
  vector = vectorTemp;
}
+1 −1
Original line number Diff line number Diff line
#include "TNL/Cuda/MemoryHelpers.h"
#include <TNL/Cuda/MemoryHelpers.h>
#define DEBUG 0


Loading