Commit e7880461 authored by Jakub Klinkovský's avatar Jakub Klinkovský

Removed MIC support

parent ccd42739
......@@ -21,7 +21,6 @@ stages:
WITH_OPENMP: "no"
WITH_CUDA: "no"
WITH_CUDA_ARCH: "auto"
WITH_MIC: "no"
WITH_MPI: "no"
# configurations
WITH_TESTS: "no"
......@@ -56,7 +55,6 @@ stages:
-DWITH_MPI=${WITH_MPI}
-DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_MIC=${WITH_MIC}
-DWITH_TESTS=${WITH_TESTS}
-DWITH_DOC=${WITH_DOC}
-DWITH_COVERAGE=${WITH_COVERAGE}
......
......@@ -17,7 +17,6 @@ set( tnlVersion "0.1" )
# declare all custom build options
option(OFFLINE_BUILD "Offline build (i.e. without downloading libraries such as pybind11)" OFF)
option(WITH_MIC "Build with MIC support" OFF)
option(WITH_CUDA "Build with CUDA support" ON)
set(WITH_CUDA_ARCH "auto" CACHE STRING "Build for these CUDA architectures")
option(WITH_OPENMP "Build with OpenMP support" ON)
......@@ -120,22 +119,6 @@ if( NOT DEFINED ENV{CI_JOB_NAME} )
endif()
endif()
if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_ICPC -wd2568 -wd2571 -wd2570")
#####
# Check for MIC
#
if( ${WITH_MIC} )
message( "Enabled MIC support." )
set( MIC_CXX_FLAGS "-DHAVE_MIC")
# build all tests with MIC support
set( CXX_TESTS_FLAGS ${CXX_TESTS_FLAGS} -DHAVE_MIC )
set( WITH_CUDA OFF CACHE BOOL "Build with CUDA support" )
else()
set( MIC_CXX_FLAGS "")
endif()
endif()
# force colorized output in continuous integration
if( DEFINED ENV{CI_JOB_NAME} OR ${CMAKE_GENERATOR} STREQUAL "Ninja" )
message(STATUS "Continuous integration or Ninja detected -- forcing compilers to produce colorized output.")
......@@ -355,7 +338,6 @@ INCLUDE( CPack )
# Print custom build options
message( "-- Build options:" )
message( " OFFLINE_BUILD = ${OFFLINE_BUILD}" )
message( " WITH_MIC = ${WITH_MIC}" )
message( " WITH_CUDA = ${WITH_CUDA}" )
message( " WITH_CUDA_ARCH = ${WITH_CUDA_ARCH}" )
message( " WITH_OPENMP = ${WITH_OPENMP}" )
......
......@@ -48,7 +48,6 @@ do
--offline-build ) OFFLINE_BUILD="yes" ;;
--with-clang=* ) WITH_CLANG="${option#*=}" ;;
--with-mpi=* ) WITH_MPI="${option#*=}" ;;
--with-mic=* ) WITH_MIC="${option#*=}" ;;
--with-cuda=* ) WITH_CUDA="${option#*=}" ;;
--with-cuda-arch=* ) WITH_CUDA_ARCH="${option#*=}";;
--with-openmp=* ) WITH_OPENMP="${option#*=}" ;;
......@@ -78,7 +77,6 @@ if [[ ${HELP} == "yes" ]]; then
echo " --install=yes/no Enables the installation of TNL files."
echo " --offline-build=yes/no Disables online updates during the build. 'no' by default."
echo " --with-mpi=yes/no Enables MPI. 'yes' by default (OpenMPI required)."
echo " --with-mic=yes/no Enables MIC (Intel Xeon Phi). 'no' by default (Intel Compiler required)."
echo " --with-cuda=yes/no Enables CUDA. 'yes' by default (CUDA Toolkit is required)."
echo " --with-cuda-arch=all/auto/3.0/3.5/... Chooses CUDA architecture. 'auto' by default."
echo " --with-openmp=yes/no Enables OpenMP. 'yes' by default."
......@@ -126,7 +124,6 @@ cmake_command=(
-DCMAKE_BUILD_TYPE=${BUILD}
-DCMAKE_INSTALL_PREFIX=${PREFIX}
-DOFFLINE_BUILD=${OFFLINE_BUILD}
-DWITH_MIC=${WITH_MIC}
-DWITH_CUDA=${WITH_CUDA}
-DWITH_CUDA_ARCH=${WITH_CUDA_ARCH}
-DWITH_OPENMP=${WITH_OPENMP}
......
......@@ -8,8 +8,6 @@
/* See Copyright Notice in tnl/Copyright */
#include <TNL/Devices/MIC.h>
#pragma once
#include "GridTraverser.h"
......
......@@ -10,7 +10,6 @@
#pragma once
#include <TNL/Devices/MIC.h>
#include <TNL/Communicators/MpiCommunicator.h>
#include <TNL/Communicators/NoDistrCommunicator.h>
#include "ComputeBlockResidue.h"
......@@ -209,28 +208,7 @@ void Euler< Problem, SolverMonitor >::computeNewTimeLevel( DofVectorPointer& u,
}
#endif
}
//MIC
if( std::is_same< DeviceType, Devices::MIC >::value )
{
#ifdef HAVE_MIC
Devices::MICHider<RealType> mu;
mu.pointer=_u;
Devices::MICHider<RealType> mk1;
mk1.pointer=_k1;
#pragma offload target(mic) in(mu,mk1,size) inout(localResidue)
{
#pragma omp parallel for reduction(+:localResidue) firstprivate( mu, mk1 )
for( IndexType i = 0; i < size; i ++ )
{
const RealType add = tau * mk1.pointer[ i ];
mu.pointer[ i ] += add;
localResidue += std::fabs( add );
}
}
#endif
}
localResidue /= tau * ( RealType ) size;
Problem::CommunicatorType::Allreduce( &localResidue, &currentResidue, 1, MPI_SUM, Problem::CommunicatorType::AllGroup );
//std::cerr << "Local residue = " << localResidue << " - globalResidue = " << currentResidue << std::endl;
......
set( tnl_heat_equation_SOURCES
set( tnl_heat_equation_SOURCES
tnl-heat-equation.cpp
tnl-heat-equation-eoc.cpp
tnl-heat-equation.cu
tnl-heat-equation-eoc.cu )
IF( BUILD_CUDA )
CUDA_ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cu)
CUDA_ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cu)
target_link_libraries (tnl-heat-equation ${CUSPARSE_LIBRARY} )
target_link_libraries (tnl-heat-equation-eoc-test ${CUSPARSE_LIBRARY} )
ELSE( BUILD_CUDA )
ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)
ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)
TARGET_COMPILE_DEFINITIONS( tnl-heat-equation PUBLIC ${MIC_CXX_FLAGS} )
ELSE( BUILD_CUDA )
ADD_EXECUTABLE(tnl-heat-equation tnl-heat-equation.cpp)
ADD_EXECUTABLE(tnl-heat-equation-eoc-test tnl-heat-equation-eoc.cpp)
ENDIF( BUILD_CUDA )
......@@ -20,7 +19,7 @@ INSTALL( TARGETS tnl-heat-equation
tnl-heat-equation-eoc-test
RUNTIME DESTINATION bin
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
INSTALL( FILES tnl-run-heat-equation-eoc-test
tnl-run-heat-equation
${tnl_heat_equation_SOURCES}
......
......@@ -14,10 +14,8 @@
#include <TNL/Allocators/Host.h>
#include <TNL/Allocators/Cuda.h>
#include <TNL/Allocators/MIC.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/MIC.h>
namespace TNL {
namespace Allocators {
......@@ -45,13 +43,5 @@ struct Default< Devices::Cuda >
using Allocator = Allocators::Cuda< T >;
};
//! Sets \ref Allocators::MIC as the default allocator for \ref Devices::MIC.
template<>
struct Default< Devices::MIC >
{
template< typename T >
using Allocator = Allocators::MIC< T >;
};
} // namespace Allocators
} // namespace TNL
/***************************************************************************
MIC.h - description
-------------------
begin : Jul 2, 2019
copyright : (C) 2019 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include <TNL/Devices/MIC.h>
#include <TNL/Exceptions/MICSupportMissing.h>
namespace TNL {
namespace Allocators {
/**
* \brief Allocator for the MIC device memory space.
*/
template< class T >
struct MIC
{
using value_type = T;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
MIC() = default;
MIC( const MIC& ) = default;
MIC( MIC&& ) = default;
MIC& operator=( const MIC& ) = default;
MIC& operator=( MIC&& ) = default;
template< class U >
MIC( const MIC< U >& )
{}
template< class U >
MIC( MIC< U >&& )
{}
template< class U >
MIC& operator=( const MIC< U >& )
{
return *this;
}
template< class U >
MIC& operator=( MIC< U >&& )
{
return *this;
}
value_type* allocate( size_type size )
{
#ifdef HAVE_MIC
Devices::MICHider<void> hide_ptr;
#pragma offload target(mic) out(hide_ptr) in(size)
{
hide_ptr.pointer = malloc(size * sizeof(value_type));
}
return hide_ptr.pointer;
#else
throw Exceptions::MICSupportMissing();
#endif
}
void deallocate(value_type* ptr, size_type)
{
#ifdef HAVE_MIC
Devices::MICHider<void> hide_ptr;
hide_ptr.pointer=ptr;
#pragma offload target(mic) in(hide_ptr)
{
free(hide_ptr.pointer);
}
#else
throw Exceptions::MICSupportMissing();
#endif
}
};
template<class T1, class T2>
bool operator==(const MIC<T1>&, const MIC<T2>&)
{
return true;
}
template<class T1, class T2>
bool operator!=(const MIC<T1>& lhs, const MIC<T2>& rhs)
{
return !(lhs == rhs);
}
} // namespace Allocators
} // namespace TNL
......@@ -38,7 +38,7 @@
#define TNL_NVCC_HD_WARNING_DISABLE
#endif
#if defined(NDEBUG) || defined(HAVE_MIC)
#ifdef NDEBUG
// empty macros for optimized build
/**
......
......@@ -12,7 +12,6 @@
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/MIC.h>
namespace TNL {
namespace Containers {
......@@ -194,95 +193,6 @@ struct ArrayOperations< Devices::Host, Devices::Cuda >
const Index size );
};
template<>
struct ArrayOperations< Devices::MIC >
{
template< typename Element >
static void setElement( Element* data,
const Element& value );
template< typename Element >
static Element getElement( const Element* data );
template< typename Element, typename Index >
static void set( Element* data,
const Element& value,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename Index,
typename SourceIterator >
static void copyFromIterator( DestinationElement* destination,
Index destinationSize,
SourceIterator first,
SourceIterator last );
template< typename Element1,
typename Element2,
typename Index >
static bool compare( const Element1* destination,
const Element2* source,
const Index size );
template< typename Element,
typename Index >
static bool containsValue( const Element* data,
const Index size,
const Element& value );
template< typename Element,
typename Index >
static bool containsOnlyValue( const Element* data,
const Index size,
const Element& value );
};
template<>
struct ArrayOperations< Devices::MIC, Devices::Host >
{
public:
template< typename DestinationElement,
typename SourceElement,
typename Index >
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
static bool compare( const DestinationElement* destination,
const SourceElement* source,
const Index size );
};
template<>
struct ArrayOperations< Devices::Host, Devices::MIC >
{
template< typename DestinationElement,
typename SourceElement,
typename Index >
static void copy( DestinationElement* destination,
const SourceElement* source,
const Index size );
template< typename DestinationElement,
typename SourceElement,
typename Index >
static bool compare( const DestinationElement* destination,
const SourceElement* source,
const Index size );
};
} // namespace Algorithms
} // namespace Containers
} // namespace TNL
......@@ -290,4 +200,3 @@ struct ArrayOperations< Devices::Host, Devices::MIC >
#include <TNL/Containers/Algorithms/ArrayOperationsStatic.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsHost.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsCuda.hpp>
#include <TNL/Containers/Algorithms/ArrayOperationsMIC.hpp>
This diff is collapsed.
......@@ -20,11 +20,9 @@
* This macro serves for definition of function which are supposed to be called
* even from device. If HAVE_CUDA is defined, the __cuda_callable__ function
* is compiled for both CPU and GPU. If HAVE_CUDA is not defined, this macro has
* no effect. Support for Intel Xeon Phi is now in "hibernated" state.
* no effect.
*/
#ifdef HAVE_MIC
#define __cuda_callable__ __attribute__((target(mic)))
#elif HAVE_CUDA
#ifdef HAVE_CUDA
#define __cuda_callable__ __device__ __host__
#else
#define __cuda_callable__
......
/***************************************************************************
MIC.h - description
-------------------
begin : Nov 7, 2016
copyright : (C) 2016 by Tomas Oberhuber
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Vit Hanousek
#pragma once
#include <iostream>
#include <cstring>
#include <unistd.h>
#include <TNL/String.h>
#include <TNL/Assert.h>
#include <TNL/Pointers/SmartPointersRegister.h>
#include <TNL/Timer.h>
#include <TNL/Devices/CudaCallable.h>
namespace TNL {
namespace Devices {
namespace {
//useful macros from Intel's tutorials -- but we do not use it, becaouse it is tricky (system of maping variables CPU-MIC)
#define ALLOC alloc_if(1) //alloac variable at begining of offloaded block -- default
#define FREE free_if(1) // delete variable at the end of offloaded block -- default
#define RETAIN free_if(0) //do not delete variable at the end of offladed block
#define REUSE alloc_if(0) //do not alloc variable at begin of offloaded block, reuse variable on MIC which was not deleted befeore
//structure which hides pointer - bypass mapping of variables and addresses of arrays and allow get RAW addres of MIC memory to RAM
template< typename Type >
struct MICHider{
Type *pointer;
};
//inflatable structure -- structures can be copied to MIC - classes not (viz paper published after CSJP 2016 in Krakow)
//object can be copied in side this structure and then copied into MIC memory
template <unsigned int VELIKOST>
struct MICStruct{
uint8_t data[VELIKOST];
};
//Macros which can make code better readeble --but they are tricky, creating variables with specific names...
//version using inflatable structure
#define TNLMICSTRUCT(bb,typ) Devices::MICStruct<sizeof(typ)> s ## bb; \
memcpy((void*)& s ## bb,(void*)& bb,sizeof(typ));
#define TNLMICSTRUCTOFF(bb,typ) s ## bb
#define TNLMICSTRUCTUSE(bb,typ) typ * kernel ## bb = (typ*) &s ## bb;
#define TNLMICSTRUCTALLOC(bb,typ) typ * kernel ## bb = (typ*) malloc (sizeof(typ)); \
memcpy((void*)kernel ## bb,(void*) & s ## bb, sizeof(typ));
//version which retypes pointer of object to pointer to array of uint8_t,
//object can be copied using uint8_t pointer as array with same length as object size
#define TNLMICHIDE(bb,typ) uint8_t * u ## bb=(uint8_t *)&bb; \
MICHider<typ> kernel ## bb;
#define TNLMICHIDEALLOCOFF(bb,typ) in(u ## bb:length(sizeof(typ))) out(kernel ## bb)
#define TNLMICHIDEALLOC(bb,typ) kernel ## bb.pointer=(typ*)malloc(sizeof(typ)); \
memcpy((void*)kernel ## bb.pointer,(void*)u ## bb,sizeof(typ));
#define TNLMICHIDEFREEOFF(bb,typ) in(kernel ## bb)
#define TNLMICHIDEFREE(bb,typ) free((void*)kernel ## bb.pointer
class MIC
{
public:
static String getDeviceType()
{
return String( "Devices::MIC" );
};
// TODO: Remove getDeviceType();
static inline String getType() { return getDeviceType(); };
#ifdef HAVE_MIC
//useful debuging -- but produce warning
__cuda_callable__ static inline void CheckMIC(void)
{
#ifdef __MIC__
std::cout<<"ON MIC"<<std::endl;
#else
std::cout<<"ON CPU" <<std::endl;
#endif
};
//old copying funciton -- deprecated
template <typename TYP>
static
TYP * passToDevice(TYP &objektCPU)
{
uint8_t * uk=(uint8_t *)&objektCPU;
MICHider<TYP> ret;
#pragma offload target(mic) in(uk:length(sizeof(TYP))) out(ret)
{
ret.pointer=(TYP*)malloc(sizeof(TYP));
std::memcpy((void*)ret.pointer,(void*)uk,sizeof(TYP));
}
return ret.pointer;
std::cout << "Někdo mně volá :-D" <<std::endl;
};
//old cleaning function -- deprecated
template <typename TYP>
static
void freeFromDevice(TYP *objektMIC)
{
MICHider<TYP> ptr;
ptr.pointer=objektMIC;
#pragma offload target(mic) in(ptr)
{
free((void*)ptr.pointer);
}
};
static inline
void CopyToMIC(void* mic_ptr,void* ptr,size_t size)
{
uint8_t image[size];
std::memcpy((void*)&image,ptr,size);
Devices::MICHider<void> hide_ptr;
hide_ptr.pointer=mic_ptr;
#pragma offload target(mic) in(hide_ptr) in(image) in(size)
{
std::memcpy((void*)hide_ptr.pointer,(void*)&image,size);
}
};
#endif
static void insertSmartPointer( Pointers::SmartPointer* pointer )
{
smartPointersRegister.insert( pointer, -1 );
}
static void removeSmartPointer( Pointers::SmartPointer* pointer )
{
smartPointersRegister.remove( pointer, -1 );
}
// Negative deviceId means that CudaDeviceInfo::getActiveDevice will be
// called to get the device ID.
static bool synchronizeDevice( int deviceId = -1 )
{
smartPointersSynchronizationTimer.start();
bool b = smartPointersRegister.synchronizeDevice( deviceId );
smartPointersSynchronizationTimer.stop();
return b;
}
static Timer smartPointersSynchronizationTimer;
protected:
static Pointers::SmartPointersRegister smartPointersRegister;
};
Pointers::SmartPointersRegister MIC::smartPointersRegister;
Timer MIC::smartPointersSynchronizationTimer;
} // namespace <unnamed>
} // namespace Devices
} // namespace TNL
/***************************************************************************
MICBadAlloc.h - description
-------------------
begin : Jul 31, 2017
copyright : (C) 2017 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include <new>
namespace TNL {
namespace Exceptions {
struct MICBadAlloc
: public std::bad_alloc
{
const char* what() const throw()
{
return "Failed to allocate memory on the MIC device: "
"most likely there is not enough space on the device memory.";
}
};
} // namespace Exceptions
} // namespace TNL
/***************************************************************************
MICSupportMissing.h - description
-------------------
begin : Jul 31, 2017
copyright : (C) 2017 by Tomas Oberhuber et al.
email : tomas.oberhuber@fjfi.cvut.cz
***************************************************************************/
/* See Copyright Notice in tnl/Copyright */
// Implemented by: Jakub Klinkovsky
#pragma once
#include <stdexcept>
namespace TNL {
namespace Exceptions {
struct MICSupportMissing
: public std::runtime_error
{
MICSupportMissing()
: std::runtime_error( "MIC support is missing, but the program called a function which needs it. "
"Please recompile the program with MIC support." )
{}
};
} // namespace Exceptions
} // namespace TNL
......@@ -16,7 +16,6 @@
#include <TNL/String.h>
#include <TNL/Devices/Host.h>
#include <TNL/Devices/Cuda.h>
#include <TNL/Devices/MIC.h>
namespace TNL {
......@@ -154,14 +153,6 @@ class File
typename = void >
void load_impl( Type* buffer, std::streamsize elements );
template< typename Type,
typename SourceType,
typename Device,
typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
typename = void,
typename = void >
void load_impl( Type* buffer, std::streamsize elements );
template< typename Type,
typename TargetType,
typename Device,
......@@ -175,14 +166,6 @@ class File
typename = void >
void save_impl( const Type* buffer, std::streamsize elements );
template< typename Type,
typename TargetType,
typename Device,
typename = typename std::enable_if< std::is_same< Device, Devices::MIC >::value >::type,
typename = void,
typename = void >
void save_impl( const Type* buffer, std::streamsize elements );
std::fstream file;
String fileName;
......
......@@ -18,7 +18,6 @@
#include <TNL/File.h>
#include <TNL/Assert.h>
#include <TNL/Exceptions/CudaSupportMissing.h>
#include <TNL/Exceptions/MICSupportMissing.h>
#include <TNL/Exceptions/FileSerializationError.h>
#include <TNL/Exceptions/FileDeserializationError.h>
#include <TNL/Exceptions/NotImplementedError.h>
......@@ -168,48 +167,6 @@ void File::load_impl( Type* buffer, std::streamsize elements )
#endif
}
// MIC
template< typename Type,
typename SourceType,
typename Device,
typename, typename, typename >
void File::load_impl( Type* buffer, std::streamsize elements )
{
#ifdef HAVE_MIC
const std::streamsize host_buffer_size = std::min( TransferBufferSize / (std::streamsize) sizeof(Type), elements );
using BaseType = typename std::remove_cv< Type >::type;
std::unique_ptr< BaseType[] > host_buffer{ new BaseType[ host_buffer_size ] };
std::streamsize readElements = 0;
if( std::is_same< Type, SourceType >::value )
{
while( readElements < elements )
{
const std::streamsize transfer = std::min( elements - readElements, host_buffer_size );
file.read( reinterpret_cast<char*>(host_buffer.get()), sizeof(Type) * transfer );
Devices::MICHider<Type> device_buff;
device_buff.pointer=buffer;
#pragma offload target(mic) in(device_buff,readElements) in(host_buffer:length(transfer))
{
/*
for(int i=0;i<transfer;i++)
device_buff.pointer[readElements+i]=host_buffer[i];
*/
memcpy(&(device_buff.pointer[readElements]), host_buffer.get(), transfer*sizeof(Type) );
}