From eb1a157894fea7b8c0df31fffbe2fad8ce13f1c4 Mon Sep 17 00:00:00 2001
From: Lukas Cejka <lukas.ostatek@gmail.com>
Date: Sun, 31 Mar 2019 19:25:45 +0200
Subject: [PATCH] Preliminary templating of WarpInfo and WarpList. Preliminary
 fix of mistake where WarpList would loop infinitely. Added checkpoints.
 Committing for backup purposes.

---
 src/TNL/Matrices/AdEllpack_impl.h | 186 ++++++++++++++++++++++++------
 1 file changed, 148 insertions(+), 38 deletions(-)

diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h
index fe0205c5f0..37ff9eb4be 100644
--- a/src/TNL/Matrices/AdEllpack_impl.h
+++ b/src/TNL/Matrices/AdEllpack_impl.h
@@ -21,10 +21,11 @@ namespace Matrices {
 /*
  * Auxiliary list implementation
  */
-warpList::warpList()
+template< typename MatrixType >
+warpList< MatrixType >::warpList()
 {
-    this->head = new warpInfo;
-    this->tail = new warpInfo;
+    this->head = new warpInfo< MatrixType >;
+    this->tail = new warpInfo< MatrixType >;
     this->head->previous = NULL;
     this->head->next = this->tail;
     this->tail->previous = this->head;
@@ -33,12 +34,13 @@ warpList::warpList()
     this->numberOfWarps = 0;
 }
 
-bool warpList::addWarp( const int offset,
-                        const int rowOffset,
-                        const int localLoad,
-                        const int* reduceMap )
+template< typename MatrixType >
+bool warpList< MatrixType >::addWarp( const IndexType offset,
+                                      const IndexType rowOffset,
+                                      const IndexType localLoad,
+                                      const IndexType* reduceMap )
 {
-    warpInfo* temp = new warpInfo();
+    warpInfo< MatrixType >* temp = new warpInfo< MatrixType >();
     if( !temp )
         return false;
     temp->offset = offset;
@@ -56,13 +58,15 @@ bool warpList::addWarp( const int offset,
     return true;
 }
 
-warpInfo* warpList::splitInHalf( warpInfo* warp )
+template< typename MatrixType >
+warpInfo< MatrixType >* warpList< MatrixType >::splitInHalf( warpInfo< MatrixType >* warp )
 {
-    warpInfo* firstHalf = new warpInfo();
-    warpInfo* secondHalf = new warpInfo();
-    int localLoad = ( warp->localLoad / 2 ) + ( warp->localLoad % 2 == 0 ? 0 : 1 );
+    warpInfo< MatrixType >* firstHalf = new warpInfo< MatrixType >();
+    warpInfo< MatrixType >* secondHalf = new warpInfo< MatrixType >();
+    
+    IndexType localLoad = ( warp->localLoad / 2 ) + ( warp->localLoad % 2 == 0 ? 0 : 1 );
 
-    int rowOffset = warp->rowOffset;
+    IndexType rowOffset = warp->rowOffset;
 
     // first half split
     firstHalf->localLoad = localLoad;
@@ -132,11 +136,12 @@ warpInfo* warpList::splitInHalf( warpInfo* warp )
     return firstHalf;
 }
 
-warpList::~warpList()
+template< typename MatrixType >
+warpList< MatrixType >::~warpList()
 {
     while( this->head->next != NULL )
     {
-        warpInfo* temp = new warpInfo;
+        warpInfo< MatrixType >* temp = new warpInfo< MatrixType >;
         temp = this->head->next;
         this->head->next = temp->next;
         delete temp;
@@ -186,30 +191,48 @@ void
 AdEllpack< Real, Device, Index >::
 setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
 {
+    std::cout << "\tCompressedRowLengths:" << std::endl;
+    
     TNL_ASSERT( this->getRows() > 0, );
     TNL_ASSERT( this->getColumns() > 0, );
+    
+    std::cout << "\t\tAssert rows and columns > 0." << std::endl;
+    
     if( std::is_same< DeviceType, Devices::Host >::value )
     {
-         RealType average = 0.0;
-         for( IndexType row = 0; row < this->getRows(); row++ )
-            average += rowLengths.getElement( row );
-         average /= ( RealType ) this->getRows();
-         this->totalLoad = average;
-
-        warpList* list = new warpList();
+        RealType average = 0.0;
+        for( IndexType row = 0; row < this->getRows(); row++ )
+           average += rowLengths.getElement( row );
+        average /= ( RealType ) this->getRows();
+        this->totalLoad = average;
+        
+        // TEST
+        std::cout << "\t\tAverage assigned to totalLoad." << std::endl;
+
+        warpList< ThisType >* list = new warpList< ThisType >();
+        
+        // TEST
+        std::cout << "\t\tNew warpList created." << std::endl;
 
         if( !this->balanceLoad( average, rowLengths, list ) )
             throw 0; // TODO: Make better exception
+        
+        // TEST
+        std::cout << "\t\tbalanceLoad exception was not thrown." << std::endl;
 
         IndexType SMs = 15;
         IndexType threadsPerSM = 2048;
 
         this->computeWarps( SMs, threadsPerSM, list );
+        
+        // TEST
+        std::cout << "\t\tWarps computed." << std::endl;
 
         if( !this->createArrays( list ) )
             throw 0; // TODO: Make better excpetion
-
-
+        
+        // TEST
+        std::cout << "\t\tArrays created." << std::endl;
 
         //this->performRowTest();
         //cout << "========================" << std::endl;
@@ -217,7 +240,10 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
         //cout << "========================" << std::endl;
         //this->performRowLengthsTest( rowLengths );
     }
-
+    
+    // TEST
+    std::cout << "\tCompleted host setup." << std::endl;
+    
     if( std::is_same< DeviceType, Devices::Cuda >::value )
     {
         AdEllpack< RealType, Devices::Host, IndexType > hostMatrix;
@@ -235,10 +261,13 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths )
         this->localLoad = hostMatrix.localLoad;
         this->reduceMap.setLike( hostMatrix.reduceMap );
         this->reduceMap = hostMatrix.reduceMap;
-         this->totalLoad = hostMatrix.getTotalLoad();
+        this->totalLoad = hostMatrix.getTotalLoad();
 
         this->allocateMatrixElements( this->offset.getElement( this->offset.getSize() - 1 ) );
     }
+    
+    // TEST
+    std::cout << "\tCompleted device setup." << std::endl;
 }
 
 template< typename Real,
@@ -686,7 +715,7 @@ template< typename Real,
           typename Index >
 bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
                                                     ConstCompressedRowLengthsVectorView rowLengths,
-                                                    warpList* list )
+                                                    warpList< ThisType >* list )
 {
     IndexType offset, rowOffset, localLoad, reduceMap[ 32 ];
 
@@ -722,7 +751,7 @@ bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
                 for( IndexType i = numberOfThreads + 1; i < this->warpSize; i++ )
                     reduceMap[ i ] = 0;
 
-                if( !list->addWarp( offset, rowOffset, localLoad, (int *)reduceMap ) )
+                if( !list->addWarp( offset, rowOffset, localLoad, reduceMap ) )
                     return false;
 
                 offset += this->warpSize * localLoad;
@@ -752,7 +781,7 @@ bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
                     reduceMap[ i ] = 0;
 
                 // count new offsets, add new warp and reset variables
-                if( !list->addWarp( offset, rowOffset, localLoad, (int *)reduceMap ) )
+                if( !list->addWarp( offset, rowOffset, localLoad, reduceMap ) )
                     return false;
                 offset += this->warpSize * localLoad;
                 rowOffset = row;
@@ -791,7 +820,7 @@ bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average,
         if( ( ( row == this->getRows() - 1 ) && !addedWarp ) ||
             ( ( row == this->getRows() - 1 ) && ( threadsPerRow == numberOfThreads ) && ( numberOfThreads > 0 ) ) )
         {
-            list->addWarp( offset, rowOffset, localLoad, (int *)reduceMap );
+            list->addWarp( offset, rowOffset, localLoad, reduceMap );
         }
     }
     return true;
@@ -801,44 +830,125 @@ template< typename Real,
           typename Device,
           typename Index >
 void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs,
-                                                              const IndexType threadsPerSM,
-                                                              warpList* list )
+                                                     const IndexType threadsPerSM,
+                                                     warpList< ThisType >* list )
 {
+// Included for 'system("pause")'. Where pause is "read -p 'Press Enter to continue...' var" in linux-based systems.
+#include <iostream>
+    std::cout << "\t\tComputeWarps:" << std::endl;
+    
     IndexType averageLoad = 0;
-    warpInfo* temp = list->getHead()->next;
-    while( temp->next != list->getTail() )
+    warpInfo< ThisType >* temp = list->getHead()->next;
+    
+    //TEST
+    list->printList();
+    
+    // MISTAKE? If list looks like this:
+    //
+    //      Head:	i->localLoad = 0	i->offset = 0	i->rowOffset = 0
+    //                  i->localLoad = 1	i->offset = 0	i->rowOffset = 0
+    //      Tail:	i->localLoad = 0	i->offset = 0	i->rowOffset = 0
+    //      
+    //      Then temp will start out as 'Head->next', but 'temp->next' will EQUAL 'list->getTail()'.
+    //      SO, the following while loop to set averageLoad will never happen.
+    while( temp/*->next*/ != list->getTail() )
     {
         averageLoad += temp->localLoad;
         temp = temp->next;
     }
-    averageLoad /= list->getNumberOfWarps();
+    
+    // MISTAKE? If averageLoad is 1, and number of warpInfos in the warpList is more than 1,
+    //              integer division will occur, setting the averageLoad to 0. Consequently causing an
+    //              infinite loop out of the inner while loop (where splitInHalf( temp ) happens). 
+    /*averageLoad /= list->getNumberOfWarps();*/
+    
+    // TEST
+    std::cout << "\t\t\tBefore roundUpDivision:" << std::endl;
+    std::cout << "\t\t\t\taverageLoad = " << averageLoad << "\tlist->getNumberOfWarps() = " << list->getNumberOfWarps() << std::endl;
+    
+    // TEST
+    averageLoad = roundUpDivision( averageLoad, list->getNumberOfWarps() );
+    
+    // TEST
+    std::cout << "\t\t\tAverage load calculated. = " << averageLoad << std::endl;
 
     IndexType totalWarps = SMs * ( threadsPerSM / this->warpSize );
     IndexType remainingThreads = list->getNumberOfWarps();
     bool warpsToSplit = true;
+    
+    // TEST
+    std::cout << "\t\t\tTotal warps, remaining threads, warpsToSplit set." << std::endl;
 
     while( remainingThreads < ( totalWarps / 2 ) && warpsToSplit )
     {
+        // TEST
+        std::cout << "\t\t\tBeginning of outer while." << std::endl;
+        
         warpsToSplit = false;
         temp = list->getHead()->next;
+        
+        // TEST - PRINT
+        std::cout << "\t\t\t\t[ list PRINT ]: " << std::endl;
+        list->printList();
+        
+        // FIXME: This can be an INFINITE LOOP.
+        //        It will cause the process to be killed by bash.
         while( temp != list->getTail() )
         {
+            // TEST
+            std::cout << "\n\t\t\t\tBeginning of inner while." << std::endl;
+            std::cout << "\t\t\t\ttemp->localLoad = " << temp->localLoad << "\ttemp->offset = " << temp->offset << "\ttemp->rowOffset = " << temp->rowOffset << std::endl;
+            
+            // FIXME: localLoad of newly created secondHalf from splitInHalf is always at least 1.
+            //          If averageLoad is 0, then this will create new warpInfos until the system memory is depleted.
             if( temp->localLoad > averageLoad )
             {
                 temp = list->splitInHalf( temp );
                 warpsToSplit = true;
-		
+                
+                // TEST - PRINT after splitInHalf
+                std::cout << "\t\t\t\t[ list PRINT - after splitInHalf ]: " << std::endl;
+                list->printList();
+                
+                // TEST
+                std::cout << "\n\t\t\t\t\ttemp after splitInHalf:" << std::endl;
+                std::cout << "\t\t\t\t\ttemp->localLoad = " << temp->localLoad << "\ttemp->offset = " << temp->offset << "\ttemp->rowOffset = " << temp->rowOffset << std::endl;
+                
+                // TEST
+                if( temp == list->getHead()->next )
+                    std::cout << "\n\t\t\t\t\ttemp == list->getHead()->next" << std::endl;
+                
             }
+            
+            // TEST
+            if( temp->next == list->getHead()->next->next )
+                std::cout << "\n\t\t\t\t\ttemp->next == list->getHead()->next->next" << std::endl;
+            
+            // TEST
+            if( list->getHead()->next->next == list->getTail() )
+                std::cout << "\n\t\t\t\t\tlist->getHead()->next->next == list->getTail()" << std::endl;
+            
             temp = temp->next;
+            
+            // TEST
+            std::cout << "\t\t\t\t\ttemp after temp->next:" << std::endl;
+            std::cout << "\t\t\t\t\ttemp->localLoad = " << temp->localLoad << "\ttemp->offset = " << temp->offset << "\ttemp->rowOffset = " << temp->rowOffset << std::endl;
+            
+            // TEST
+            system("read -p 'Press Enter to continue...' var");
         }
 	remainingThreads = list->getNumberOfWarps();
+        
+        // TEST
+        std::cout << "\t\t\tRemaining threads set." << std::endl;
+        
     }
 }
 
 template< typename Real,
           typename Device,
           typename Index >
-bool AdEllpack< Real, Device, Index >::createArrays( warpList* list )
+bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list )
 {
     IndexType length = list->getNumberOfWarps();
 
@@ -848,7 +958,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList* list )
     this->reduceMap.setSize( length * this->warpSize );
 
     IndexType iteration = 0;
-    warpInfo* warp = list->getHead()->next;
+    warpInfo< ThisType >* warp = list->getHead()->next;
     while( warp != list->getTail() )
     {
         this->offset.setElement( iteration, warp->offset );
-- 
GitLab