Deleted useless code, reformatted present code. Still have issues with...
Deleted useless code, reformatted present code. Still have issues with getNonZeroRowLength. getRow() throws SegFault and so does resultCuda. Commiting for backup purposes.
@@ -156,34 +157,31 @@ Index CSR< Real, Device, Index >::getNonZeroRowLength( const IndexType row ) con
}
if(std::is_same<DeviceType,Devices::Cuda>::value)
{
ConstMatrixRowmatrixRow=this->getRow(row);
IndexTyperesultHost;
IndexType*cols=newIndexType[4];
std::cout<<"crash1"<<std::endl;
RealType*vals=newRealType[4];
std::cout<<"crash2"<<std::endl;
for(inti=0;i<4;i++)
{
cols[i]=i;
vals[i]=1.0;
}
std::cout<<"crash3"<<std::endl;
ConstMatrixRowmatrixRow(cols,vals,4,1);// = this->getRow( row ); // If the program even compiles, this line fails because a segfault is thrown on the first line of getRow()
// PROBLEM: If thee second parameter of getNonZeroRowLengthCudaKernel is '&resultCuda', the following issue is thrown:
// 'error: no instance of function template "TNL::Matrices::getNonZeroRowLengthCudaKernel" matches the argument list'
/*TNL::Matrices::*/getNonZeroRowLengthCudaKernel<ConstMatrixRow,IndexType><<<1,1>>>(matrixRow,resultCuda);// matrixRow works fine, tested them both separately
// If this is static, it will trigger a illegal memory address
// How to get it into the lambda function?
NonConstIndexelementCount(0);
// using CudaType = typename TNL::Devices::Cuda;
// using HostType = typename TNL::Devices::Host;
//
//
// // elementCount = 0; // Only if it is static. Make sure it is reset. Without this seemingly useless step, it returned incorrect values.
//
// // PROBLEM: Lambda function with __cuda_callable__ CANNOT pass values by reference!!
// // INCORRECT ASSUMPTION!! PROBLEM: Lambda function which takes in anything via capture list, cannot return anything. (Maybe dont capture anything? pass this->values by parameter and return count?)