Commit 303225c3 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Implemented LogParser in Python using the pandas library

parent 120d3573
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3,5 +3,6 @@ find_package( PythonInterp 3 )
if( PYTHONINTERP_FOUND )
   CONFIGURE_FILE( "__init__.py.in" "${PROJECT_BUILD_PATH}/Python/__init__.py" )
   INSTALL( FILES ${PROJECT_BUILD_PATH}/Python/__init__.py
                  LogParser.py
            DESTINATION lib/python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}/site-packages/TNL )
endif()
+192 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import sys
import collections

def getSortKey(value):
    # try to convert to number if possible
try:
        return int(value)
    except ValueError:
        try:
            return float(value)
        except ValueError:
            if value:
                return value
            # None or empty string
            return 0

class columnFormating:

    def __init__( self, data ):
        self.coloring = []
        self.sorting = "none"
        self.sortingFile = ""
        self.sortingData = []
        self.sortingNaNs = 0
        dataSplit = data.split( ' ' )
        currentFormating = ""
        for word in dataSplit:
            if word == "COLORING" or word == "SORT":
                currentFormating = word
                continue
            if currentFormating == "COLORING":
                self.coloring.append( word )
            if currentFormating == "SORT":
                if word == "+" or word == "-":
                    self.sorting = word
                else:
                    self.sortingFile = word


    def get( self, value ):
        color = ""
        if len( self.coloring ) > 0:
            for token in self.coloring:
                if token.find( "#" ) == 0:
                    color = token
                else:
                    try:
                        if float( token ) > float( value ):
                            break
                    except ValueError:
                        color = ""
                        break
    import pandas
    pandas.set_option('display.max_columns', 100)
    pandas.set_option('display.max_rows', 1000)
    pandas.set_option('display.width', 150)
except ImportError:
    raise ImportError("Please make sure that the python3-pandas package is installed.")

        html = ""
        if color != "":
            html += "bgcolor=\"{}\"".format(color)

        if self.sorting == "+" or self.sorting == "-":
            try:
                number = float( value )
                self.sortingData.append( number )
            except ValueError:
                self.sortingNaNs += 1

        return html

    def processSorting( self ):
        if self.sorting == "none":
            return
        if self.sorting == "+":
            self.sortingData.sort()
        if self.sorting == "-":
            self.sortingData.sort( reverse = True )
        sortFile = open( self.sortingFile, "w" )
        sortFile.write( "# Number of NaNs is ")
        sortFile.write( str( self.sortingNaNs ) )
        sortFile.write( "\n\n" )
        idx = 0
        for n in self.sortingData:
            sortFile.write( str( idx ) )
            sortFile.write( "\t" )
            sortFile.write( str( n ) )
            sortFile.write( "\n" )
            idx += 1
        sortFile.close()


class tableColumn:

class TableColumn:
    def __init__(self, level, data, parentPath=None):
        self.subcolumns = []
        self.height = 0
@@ -112,43 +35,10 @@ class tableColumn:

    def insertSubcolumn( self, level, label ):
        if level == self.level + 1:
            self.subcolumns.append( tableColumn( level, label, self.path ) )
            self.subcolumns.append( TableColumn( level, label, self.path ) )
        if level > self.level + 1:
            self.subcolumns[ -1 ].insertSubcolumn( level, label )

    def countSubcolumns( self ):
        if( len( self.subcolumns ) == 0 ):
            self.numberOfSubcolumns = 1
        else:
            self.numberOfSubcolumns = 0;
            for subcolumn in self.subcolumns:
                self.numberOfSubcolumns = self.numberOfSubcolumns + subcolumn.countSubcolumns()
        return self.numberOfSubcolumns

    def countHeight( self ):
        self.height = 1;
        if len( self.subcolumns ) == 0:
            return 1
        for subcolumn in self.subcolumns:
            self.height = max( self.height, subcolumn.countHeight() + 1 )
        return self.height

    def countRowspan( self, height ):
        self.rowspan = height - self.height + 1
        #print "Setting rowspan of ", self.label, " to ", self.rowspan
        for subcolumn in self.subcolumns:
            subcolumn.countRowspan( self.height - 1 )

    def recomputeLevel( self, level ):
        self.level = level
        for subcolumn in self.subcolumns:
            subcolumn.recomputeLevel( self.level + self.rowspan )

    def getColumnHeader( self, currentLevel ):
        if currentLevel == self.level:
            return "        <td rowspan=" + str( self.rowspan ) + " colspan=" + str( self.numberOfSubcolumns) + ">" + self.label + "</td>\n"
        return ""

    def pickLeafColumns( self, leafColumns ):
        if len( self.subcolumns ) == 0:
            leafColumns.append( self )
@@ -156,50 +46,14 @@ class tableColumn:
            for subcolumn in self.subcolumns:
                subcolumn.pickLeafColumns( leafColumns )

    def getFormating( self, value ):
        formating = columnFormating(self.attributes)
        return formating.get( value )

    def processSorting( self ):
        self.formating.processSorting()

    def __repr__(self):
        return "<tableColumn(label={}, subcolumns={})>".format(self.label, [col.label for col in self.subcolumns])

        return "<TableColumn(label={}, subcolumns={})>".format(self.label, [col.label for col in self.subcolumns])


class logToHtmlConvertor:

    def __init__(self):
        self.html = ""
        self.reset()

    def reset(self):
        self.metadata = {}
        self.maxLevel = 0
        self.leafColumns = []
        self.tableColumns = collections.OrderedDict()
        self.tableRows = []

    def processFile( self, logFileName, htmlFileName ):
        # init HTML text
        self.writeHtmlHeader()

        print("Processing file", logFileName)
class LogParser:
    def readFile(self, logFileName):
        logFile = open(logFileName, 'r')
        self.readFile(logFile)
        logFile.close()

        self.writeHtmlFooter()
        print("Writing output to", htmlFileName)
        htmlFile = open( htmlFileName, 'w' )
        htmlFile.write(self.html)
        htmlFile.close()

        self.reset()
        self.html = ""

    def readFile( self, logFile ):
        # read file by lines
        lines = logFile.readlines()

@@ -211,36 +65,42 @@ class logToHtmlConvertor:
            lines.pop(0)

        while len(lines) > 0:
            self.reset()
            metadata = []
            while len(lines) > 0 and lines[0].startswith(":"):
                metadata.append(lines.pop(0))
            self.parseMetadata(metadata)
            metadata = self.parseMetadata(metadata)

            table = []
            while len(lines) > 0 and not lines[0].startswith(":"):
                table.append(lines.pop(0))
            self.parseTable(table)
            tableColumns, tableRows = self.parseTable(table)

            df = self.getDataframe(tableColumns, tableRows)
            df = df.sort_index()

            self.writeTable()
            yield metadata, df

    def parseMetadata(self, lines):
    @staticmethod
    def parseMetadata(lines):
        metadata = {}
        for line in lines:
            line = line[1:]
            key, value = line.split("=", 1)
            self.metadata[key.strip()] = value.strip()
            metadata[key.strip()] = value.strip()
        return metadata

    def parseTable(self, lines):
        header = []
        body = []
        self.tableColumns = collections.OrderedDict()
        rows = []
        while len(lines) > 0:
            header = []
            while len(lines) > 0 and lines[0].startswith("!"):
                header.append(lines.pop(0))
            body = []
            while len(lines) > 0 and not lines[0].startswith("!"):
                body.append(lines.pop(0))
            self.parseTableRow(header, body)
            header = []
            body = []
            rows.append(self.parseTableRow(header, body))
        return self.tableColumns, rows

    def parseTableRow(self, header, body):
        columns = []
@@ -248,11 +108,8 @@ class logToHtmlConvertor:
            data = line.lstrip("!")
            level = len(line) - len(data)
            label = data.strip()
            #print " Inserting column on level ", level, " and label ", label
            if level > self.maxLevel:
                self.maxLevel = level;
            if level == 1:
                columns.append( tableColumn( 1, label ) )
                columns.append( TableColumn( 1, label ) )
            if level > 1:
                columns[ -1 ].insertSubcolumn( level, label )

@@ -272,7 +129,7 @@ class logToHtmlConvertor:
        for element, column in zip(elements, leafColumns):
            path = tuple(column.path)
            row[path] = element
        self.tableRows.append(row)
        return row

    def pickLeafColumns(self, columns):
        leafColumns = []
@@ -295,120 +152,41 @@ class logToHtmlConvertor:
                self.tableColumns[path] = col
            self.mergeColumns(col.subcolumns)

    def mergeRows(self):
        # sort table
        self.tableRows.sort(key=lambda row: list(row.values()))

        i = 0
        while i < len(self.tableRows) - 1:
            currentRow = self.tableRows[ i ]
            nextRow = self.tableRows[ i + 1 ]

            can_merge = True
            for key, value in nextRow.items():
                if key in currentRow and currentRow[key] != value:
                    can_merge = False
                    break
            if can_merge is True:
                currentRow.update(nextRow)
                self.tableRows.pop(i + 1)
            else:
                i += 1

        # TODO: check this
        # sort again (just in case, previous sorting might compare values from
        # different columns)
        self.tableRows.sort(key=lambda row: [getSortKey(value) for value in row.values()])

    def countSubcolumns( self ):
        for path, col in self.tableColumns.items():
            if len(path) == 1:
                col.countSubcolumns();

    def countHeight( self ):
        for path, col in self.tableColumns.items():
            if len(path) == 1:
                col.countHeight();

    def countRowspan( self ):
        for path, col in self.tableColumns.items():
            if len(path) == 1:
                col.countRowspan( self.maxLevel )

    def recomputeLevel( self ):
        for path, col in self.tableColumns.items():
            if len(path) == 1:
                col.recomputeLevel( 1 )

    def processSorting(self):
        for path, col in self.tableColumns.items():
            if len(path) == 1:
                col.processSorting()

    def writeTable(self):
        self.mergeRows()
        self.countSubcolumns()
        self.countHeight()
        self.countRowspan()
        self.recomputeLevel()
#        self.processSorting()

        # write metadata
        self.writeMetadata()

        self.html += "<table border=1>\n"

        # write header
        self.writeColumnsHeader()

        # write data
        firstLevelColumns = [column for path, column in self.tableColumns.items() if len(path) == 1]
        leafColumns = self.pickLeafColumns(firstLevelColumns)
        for row in self.tableRows:
            self.html += "    <tr>\n"
            # walk through leafColumns to ensure correct order
            for col in leafColumns:
                path = tuple(col.path)
                if path in row:
                    value = row[path]
                    formating = col.getFormating(value)
                    self.html += "        <td {}>{}</td>\n".format(formating, value)
    @staticmethod
    def getDataframe(tableColumns, tableRows):
        # names of the index and data columns
        index_names = [k[0] for k, v in tableColumns.items() if not v.subcolumns and len(k) == 1]
        column_names = [k for k, v in tableColumns.items() if not v.subcolumns and len(k) > 1]

        values = collections.OrderedDict()
        for row in tableRows:
            # split row into index and data columns
            idx_itms = {}
            col_val = {}
            for k, v in row.items():
                if len(k) == 1 and k[0] in index_names:
                    idx_itms[k[0]] = v
                else:
                    self.html += "        <td></td>\n"
            self.html += "    </tr>\n"

        self.html += "</table>\n"

    def writeMetadata(self):
        self.html += "<h2>{}</h2>\n".format(self.metadata.get("title"))
        self.html += "<table border=1>\n"
        self.html += "<tbody>\n"
        for key in sorted(self.metadata.keys()):
            self.html += "    <tr><td>{}</td><td>{}</td></tr>\n".format(key, self.metadata[key])
        self.html += "</tbody>\n"
        self.html += "</table>\n"

    def writeColumnsHeader(self):
        level = 1
        while level <= self.maxLevel:
            self.html += "    <tr>\n"
            for path, column in self.tableColumns.items():
                self.html += column.getColumnHeader( level )
            self.html += "    </tr>\n"
            level += 1

    def writeHtmlHeader(self):
        self.html += "<html>\n"
        self.html += "<body>\n"

    def writeHtmlFooter(self):
        self.html += "</body>\n"
        self.html += "</html>\n"



arguments = sys.argv[ 1: ]
logFile = arguments[ 0 ]
htmlFile = arguments[ 1 ]
logConvertor = logToHtmlConvertor()
logConvertor.processFile( logFile, htmlFile )
                    col_val[k] = v

            # construct the index tuple
            idx = []
            for i in index_names:
                idx.append(idx_itms[i])
            idx = tuple(idx)

            # record the values
            values.setdefault(idx, {})
            values[idx].update(col_val)

        # create empty dataframe
        columns = pandas.MultiIndex.from_tuples(column_names)
        index = pandas.MultiIndex.from_tuples(values.keys(), names=index_names)
        df = pandas.DataFrame(index=index, columns=columns)

        # add data to the dataframe
        for idx, d in values.items():
            for col, val in d.items():
                df.loc[idx, col] = val

        return df
+1 −0
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@ INSTALL( FILES ${PROJECT_TOOLS_PATH}/tnl-init${debugExt}
               tnl-time-series2png
               tnl-err2eoc
               tnl-eoc-test-log
               tnl-log-to-html.py
               ${PROJECT_TOOLS_PATH}/tnl-compile 
               ${PROJECT_TOOLS_PATH}/tnl-link
               ${PROJECT_TOOLS_PATH}/tnl-bindir
+45 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import sys

from TNL.LogParser import LogParser

def metadata_to_html(metadata):
    html = "<h2>{}</h2>\n".format(metadata.get("title"))
    html += "<table border=1>\n"
    html += "<tbody>\n"
    for key in sorted(metadata.keys()):
        html += "    <tr><td>{}</td><td>{}</td></tr>\n".format(key, metadata[key])
    html += "</tbody>\n"
    html += "</table>\n"
    return html

def convertLogToHtml(logFileName, htmlFileName):
    # init HTML text
    html = "<html>\n"
    html += "<body>\n"

    parser = LogParser()

    print("Processing file", logFileName)
    for metadata, df in parser.readFile(logFileName):
        html += metadata_to_html(metadata)
        html += df.to_html()

    html += "</body>\n"
    html += "</html>\n"

    print("Writing output to", htmlFileName)
    htmlFile = open(htmlFileName, 'w')
    htmlFile.write(html)
    htmlFile.close()


arguments = sys.argv[ 1: ]
logFile = arguments[ 0 ]
if len(arguments) > 1:
    htmlFile = arguments[ 1 ]
else:
    htmlFile = logFile.rsplit(".", maxsplit=1)[0] + ".html"

convertLogToHtml(logFile, htmlFile)
+0 −3
Original line number Diff line number Diff line
@@ -9,11 +9,8 @@ INSTALL( FILES matrix-market
               tnl-run-heat-equation-benchmark
               cuda-profiler.conf
               process-cuda-profile.pl 
               tnl-log-to-html.py
               DESTINATION share/tnl-${tnlVersion}/benchmark-scripts )

INSTALL( FILES tnl-run-spmv-benchmark
               tnl-log-to-html.py
         DESTINATION bin
         PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
                                                                   
 No newline at end of file
Loading