Skip to content
Snippets Groups Projects
Commit 30ecfefa authored by Lukas Cejka's avatar Lukas Cejka
Browse files

Moved getNonZeroRowLengthCudaKernal to SparseRow_impl.h and indentified errors...

Moved getNonZeroRowLengthCudaKernal to SparseRow_impl.h and indentified errors in SparseRow_impl.h . Commiting for backup purposes.
parent bd294176
No related branches found
No related tags found
1 merge request!16Matrices
...@@ -131,20 +131,6 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const ...@@ -131,20 +131,6 @@ Index CSR< Real, Device, Index >::getRowLengthFast( const IndexType row ) const
return this->rowPointers[ row + 1 ] - this->rowPointers[ row ]; return this->rowPointers[ row + 1 ] - this->rowPointers[ row ];
} }
#ifdef HAVE_CUDA
// TODO: move to SparseRow
template< typename MatrixRow, typename Index >
__global__
void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
{
int threadId = blockIdx.x * blockDim.x + threadIdx.x;
if( threadId == 0 )
{
*result = row.getNonZeroElementsCount();
}
}
#endif
template< typename Real, template< typename Real,
typename Device, typename Device,
typename Index > typename Index >
...@@ -158,27 +144,29 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con ...@@ -158,27 +144,29 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
if( std::is_same< DeviceType, Devices::Cuda >::value ) if( std::is_same< DeviceType, Devices::Cuda >::value )
{ {
IndexType *cols = new IndexType[4]; IndexType *cols = new IndexType[4];
std::cout << "crash1" << std::endl;
RealType *vals = new RealType[4]; RealType *vals = new RealType[4];
std::cout << "crash2" << std::endl;
for( int i = 0; i < 4; i++ ) for( int i = 0; i < 4; i++ )
{ {
cols[i] = i; cols[i] = i;
vals[i] = 1.0; vals[i] = 1.0;
} }
std::cout << "crash3" << std::endl; ConstMatrixRow matrixRow(cols, vals, 4, 1);
ConstMatrixRow matrixRow(cols, vals, 4, 1); // = this->getRow( row ); // If the program even compiles, this line fails because a segfault is thrown on the first line of getRow() // ConstMatrixRow matrixRow = this->getRow( row );// If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
std::cout << "crash4" << std::endl; // WHEN debugging with GDB:
// (gdb) p this->rowPointers[0]
// Could not find operator[].
// (gdb) p rowPointers.getElement(0)
// Attempt to take address of value not located in memory.
IndexType resultHost ( 0 ); IndexType resultHost ( 0 );
IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost ); IndexType* resultCuda = Devices::Cuda::passToDevice( resultHost );
std::cout << "resultCuda = " << resultCuda << std::endl; // PROBLEM: If the second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
// PROBLEM: If thee second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
// 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list' // 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
/*TNL::Matrices::*/getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately TNL::Matrices::getNonZeroRowLengthCudaKernel< ConstMatrixRow, IndexType ><<< 1, 1 >>>( matrixRow, resultCuda ); // matrixRow works fine, tested them both separately
std::cout << "resultCuda = " << resultCuda << std::endl; delete []cols;
std::cout << "crash5" << std::endl; delete []vals;
resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address. std::cout << "Checkpoint BEFORE passFromDevice" << std::endl;
std::cout << "crash6" << std::endl; resultHost = Devices::Cuda::passFromDevice( resultCuda ); // This causes a crash: Illegal memory address in Cuda_impl.h at TNL_CHECK_CUDA_DEVICE
std::cout << "Checkpoint AFTER passFromDevice" << std::endl;
Devices::Cuda::freeFromDevice( resultCuda ); Devices::Cuda::freeFromDevice( resultCuda );
return resultHost; return resultHost;
} }
......
...@@ -112,16 +112,18 @@ getLength() const ...@@ -112,16 +112,18 @@ getLength() const
return length; return length;
} }
//template< typename MatrixRow > #ifdef HAVE_CUDA
//__global__ template< typename MatrixRow, typename Index >
//void getNonZeroRowLengthCudaKernel( const MatrixRow row, typename MatrixRow::IndexType* result ) __global__
//{ void getNonZeroRowLengthCudaKernel( const MatrixRow row, Index* result )
// int threadId = blockIdx.x * blockDim.x + threadIdx.x; {
// if( threadId == 0 ) int threadId = blockIdx.x * blockDim.x + threadIdx.x;
// { if( threadId == 0 )
// result = row->getNonZeroElementsCount(); {
// } *result = row.getNonZeroElementsCount();
//} }
}
#endif
template< typename Real, typename Index > template< typename Real, typename Index >
__cuda_callable__ __cuda_callable__
...@@ -140,7 +142,7 @@ getNonZeroElementsCount() const ...@@ -140,7 +142,7 @@ getNonZeroElementsCount() const
elementCount++; elementCount++;
} }
// std::cout << "Element Count = " << elementCount << "\n"; // std::cout << "Element Count = " << elementCount << "\n";
return elementCount; return elementCount;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment