Implemented LogParser in Python using the pandas library (303225c3) · Commits · TNL / tnl-dev

src/Python/CMakeLists.txt

+1 −0

Original line number	Diff line number	Diff line
		@@ -3,5 +3,6 @@ find_package( PythonInterp 3 )
		if( PYTHONINTERP_FOUND )
		CONFIGURE_FILE( "__init__.py.in" "${PROJECT_BUILD_PATH}/Python/__init__.py" )
		INSTALL( FILES ${PROJECT_BUILD_PATH}/Python/__init__.py
		LogParser.py
		DESTINATION lib/python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}/site-packages/TNL )
		endif()

tests/benchmarks/share/tnl-log-to-html.py→src/Python/LogParser.py

+192 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		import sys
		import collections

		def getSortKey(value):
		# try to convert to number if possible
		try:
		return int(value)
		except ValueError:
		try:
		return float(value)
		except ValueError:
		if value:
		return value
		# None or empty string
		return 0

		class columnFormating:

		def __init__( self, data ):
		self.coloring = []
		self.sorting = "none"
		self.sortingFile = ""
		self.sortingData = []
		self.sortingNaNs = 0
		dataSplit = data.split( ' ' )
		currentFormating = ""
		for word in dataSplit:
		if word == "COLORING" or word == "SORT":
		currentFormating = word
		continue
		if currentFormating == "COLORING":
		self.coloring.append( word )
		if currentFormating == "SORT":
		if word == "+" or word == "-":
		self.sorting = word
		else:
		self.sortingFile = word


		def get( self, value ):
		color = ""
		if len( self.coloring ) > 0:
		for token in self.coloring:
		if token.find( "#" ) == 0:
		color = token
		else:
		try:
		if float( token ) > float( value ):
		break
		except ValueError:
		color = ""
		break
		import pandas
		pandas.set_option('display.max_columns', 100)
		pandas.set_option('display.max_rows', 1000)
		pandas.set_option('display.width', 150)
		except ImportError:
		raise ImportError("Please make sure that the python3-pandas package is installed.")

		html = ""
		if color != "":
		html += "bgcolor=\"{}\"".format(color)

		if self.sorting == "+" or self.sorting == "-":
		try:
		number = float( value )
		self.sortingData.append( number )
		except ValueError:
		self.sortingNaNs += 1

		return html

		def processSorting( self ):
		if self.sorting == "none":
		return
		if self.sorting == "+":
		self.sortingData.sort()
		if self.sorting == "-":
		self.sortingData.sort( reverse = True )
		sortFile = open( self.sortingFile, "w" )
		sortFile.write( "# Number of NaNs is ")
		sortFile.write( str( self.sortingNaNs ) )
		sortFile.write( "\n\n" )
		idx = 0
		for n in self.sortingData:
		sortFile.write( str( idx ) )
		sortFile.write( "\t" )
		sortFile.write( str( n ) )
		sortFile.write( "\n" )
		idx += 1
		sortFile.close()


		class tableColumn:

		class TableColumn:
		def __init__(self, level, data, parentPath=None):
		self.subcolumns = []
		self.height = 0
		@@ -112,43 +35,10 @@ class tableColumn:

		def insertSubcolumn( self, level, label ):
		if level == self.level + 1:
		self.subcolumns.append( tableColumn( level, label, self.path ) )
		self.subcolumns.append( TableColumn( level, label, self.path ) )
		if level > self.level + 1:
		self.subcolumns[ -1 ].insertSubcolumn( level, label )

		def countSubcolumns( self ):
		if( len( self.subcolumns ) == 0 ):
		self.numberOfSubcolumns = 1
		else:
		self.numberOfSubcolumns = 0;
		for subcolumn in self.subcolumns:
		self.numberOfSubcolumns = self.numberOfSubcolumns + subcolumn.countSubcolumns()
		return self.numberOfSubcolumns

		def countHeight( self ):
		self.height = 1;
		if len( self.subcolumns ) == 0:
		return 1
		for subcolumn in self.subcolumns:
		self.height = max( self.height, subcolumn.countHeight() + 1 )
		return self.height

		def countRowspan( self, height ):
		self.rowspan = height - self.height + 1
		#print "Setting rowspan of ", self.label, " to ", self.rowspan
		for subcolumn in self.subcolumns:
		subcolumn.countRowspan( self.height - 1 )

		def recomputeLevel( self, level ):
		self.level = level
		for subcolumn in self.subcolumns:
		subcolumn.recomputeLevel( self.level + self.rowspan )

		def getColumnHeader( self, currentLevel ):
		if currentLevel == self.level:
		return " <td rowspan=" + str( self.rowspan ) + " colspan=" + str( self.numberOfSubcolumns) + ">" + self.label + "</td>\n"
		return ""

		def pickLeafColumns( self, leafColumns ):
		if len( self.subcolumns ) == 0:
		leafColumns.append( self )
		@@ -156,50 +46,14 @@ class tableColumn:
		for subcolumn in self.subcolumns:
		subcolumn.pickLeafColumns( leafColumns )

		def getFormating( self, value ):
		formating = columnFormating(self.attributes)
		return formating.get( value )

		def processSorting( self ):
		self.formating.processSorting()

		def __repr__(self):
		return "<tableColumn(label={}, subcolumns={})>".format(self.label, [col.label for col in self.subcolumns])

		return "<TableColumn(label={}, subcolumns={})>".format(self.label, [col.label for col in self.subcolumns])


		class logToHtmlConvertor:

		def __init__(self):
		self.html = ""
		self.reset()

		def reset(self):
		self.metadata = {}
		self.maxLevel = 0
		self.leafColumns = []
		self.tableColumns = collections.OrderedDict()
		self.tableRows = []

		def processFile( self, logFileName, htmlFileName ):
		# init HTML text
		self.writeHtmlHeader()

		print("Processing file", logFileName)
		class LogParser:
		def readFile(self, logFileName):
		logFile = open(logFileName, 'r')
		self.readFile(logFile)
		logFile.close()

		self.writeHtmlFooter()
		print("Writing output to", htmlFileName)
		htmlFile = open( htmlFileName, 'w' )
		htmlFile.write(self.html)
		htmlFile.close()

		self.reset()
		self.html = ""

		def readFile( self, logFile ):
		# read file by lines
		lines = logFile.readlines()

		@@ -211,36 +65,42 @@ class logToHtmlConvertor:
		lines.pop(0)

		while len(lines) > 0:
		self.reset()
		metadata = []
		while len(lines) > 0 and lines[0].startswith(":"):
		metadata.append(lines.pop(0))
		self.parseMetadata(metadata)
		metadata = self.parseMetadata(metadata)

		table = []
		while len(lines) > 0 and not lines[0].startswith(":"):
		table.append(lines.pop(0))
		self.parseTable(table)
		tableColumns, tableRows = self.parseTable(table)

		df = self.getDataframe(tableColumns, tableRows)
		df = df.sort_index()

		self.writeTable()
		yield metadata, df

		def parseMetadata(self, lines):
		@staticmethod
		def parseMetadata(lines):
		metadata = {}
		for line in lines:
		line = line[1:]
		key, value = line.split("=", 1)
		self.metadata[key.strip()] = value.strip()
		metadata[key.strip()] = value.strip()
		return metadata

		def parseTable(self, lines):
		header = []
		body = []
		self.tableColumns = collections.OrderedDict()
		rows = []
		while len(lines) > 0:
		header = []
		while len(lines) > 0 and lines[0].startswith("!"):
		header.append(lines.pop(0))
		body = []
		while len(lines) > 0 and not lines[0].startswith("!"):
		body.append(lines.pop(0))
		self.parseTableRow(header, body)
		header = []
		body = []
		rows.append(self.parseTableRow(header, body))
		return self.tableColumns, rows

		def parseTableRow(self, header, body):
		columns = []
		@@ -248,11 +108,8 @@ class logToHtmlConvertor:
		data = line.lstrip("!")
		level = len(line) - len(data)
		label = data.strip()
		#print " Inserting column on level ", level, " and label ", label
		if level > self.maxLevel:
		self.maxLevel = level;
		if level == 1:
		columns.append( tableColumn( 1, label ) )
		columns.append( TableColumn( 1, label ) )
		if level > 1:
		columns[ -1 ].insertSubcolumn( level, label )

		@@ -272,7 +129,7 @@ class logToHtmlConvertor:
		for element, column in zip(elements, leafColumns):
		path = tuple(column.path)
		row[path] = element
		self.tableRows.append(row)
		return row

		def pickLeafColumns(self, columns):
		leafColumns = []
		@@ -295,120 +152,41 @@ class logToHtmlConvertor:
		self.tableColumns[path] = col
		self.mergeColumns(col.subcolumns)

		def mergeRows(self):
		# sort table
		self.tableRows.sort(key=lambda row: list(row.values()))

		i = 0
		while i < len(self.tableRows) - 1:
		currentRow = self.tableRows[ i ]
		nextRow = self.tableRows[ i + 1 ]

		can_merge = True
		for key, value in nextRow.items():
		if key in currentRow and currentRow[key] != value:
		can_merge = False
		break
		if can_merge is True:
		currentRow.update(nextRow)
		self.tableRows.pop(i + 1)
		else:
		i += 1

		# TODO: check this
		# sort again (just in case, previous sorting might compare values from
		# different columns)
		self.tableRows.sort(key=lambda row: [getSortKey(value) for value in row.values()])

		def countSubcolumns( self ):
		for path, col in self.tableColumns.items():
		if len(path) == 1:
		col.countSubcolumns();

		def countHeight( self ):
		for path, col in self.tableColumns.items():
		if len(path) == 1:
		col.countHeight();

		def countRowspan( self ):
		for path, col in self.tableColumns.items():
		if len(path) == 1:
		col.countRowspan( self.maxLevel )

		def recomputeLevel( self ):
		for path, col in self.tableColumns.items():
		if len(path) == 1:
		col.recomputeLevel( 1 )

		def processSorting(self):
		for path, col in self.tableColumns.items():
		if len(path) == 1:
		col.processSorting()

		def writeTable(self):
		self.mergeRows()
		self.countSubcolumns()
		self.countHeight()
		self.countRowspan()
		self.recomputeLevel()
		# self.processSorting()

		# write metadata
		self.writeMetadata()

		self.html += "<table border=1>\n"

		# write header
		self.writeColumnsHeader()

		# write data
		firstLevelColumns = [column for path, column in self.tableColumns.items() if len(path) == 1]
		leafColumns = self.pickLeafColumns(firstLevelColumns)
		for row in self.tableRows:
		self.html += " <tr>\n"
		# walk through leafColumns to ensure correct order
		for col in leafColumns:
		path = tuple(col.path)
		if path in row:
		value = row[path]
		formating = col.getFormating(value)
		self.html += " <td {}>{}</td>\n".format(formating, value)
		@staticmethod
		def getDataframe(tableColumns, tableRows):
		# names of the index and data columns
		index_names = [k[0] for k, v in tableColumns.items() if not v.subcolumns and len(k) == 1]
		column_names = [k for k, v in tableColumns.items() if not v.subcolumns and len(k) > 1]

		values = collections.OrderedDict()
		for row in tableRows:
		# split row into index and data columns
		idx_itms = {}
		col_val = {}
		for k, v in row.items():
		if len(k) == 1 and k[0] in index_names:
		idx_itms[k[0]] = v
		else:
		self.html += " <td></td>\n"
		self.html += " </tr>\n"

		self.html += "</table>\n"

		def writeMetadata(self):
		self.html += "<h2>{}</h2>\n".format(self.metadata.get("title"))
		self.html += "<table border=1>\n"
		self.html += "<tbody>\n"
		for key in sorted(self.metadata.keys()):
		self.html += " <tr><td>{}</td><td>{}</td></tr>\n".format(key, self.metadata[key])
		self.html += "</tbody>\n"
		self.html += "</table>\n"

		def writeColumnsHeader(self):
		level = 1
		while level <= self.maxLevel:
		self.html += " <tr>\n"
		for path, column in self.tableColumns.items():
		self.html += column.getColumnHeader( level )
		self.html += " </tr>\n"
		level += 1

		def writeHtmlHeader(self):
		self.html += "<html>\n"
		self.html += "<body>\n"

		def writeHtmlFooter(self):
		self.html += "</body>\n"
		self.html += "</html>\n"



		arguments = sys.argv[ 1: ]
		logFile = arguments[ 0 ]
		htmlFile = arguments[ 1 ]
		logConvertor = logToHtmlConvertor()
		logConvertor.processFile( logFile, htmlFile )
		col_val[k] = v

		# construct the index tuple
		idx = []
		for i in index_names:
		idx.append(idx_itms[i])
		idx = tuple(idx)

		# record the values
		values.setdefault(idx, {})
		values[idx].update(col_val)

		# create empty dataframe
		columns = pandas.MultiIndex.from_tuples(column_names)
		index = pandas.MultiIndex.from_tuples(values.keys(), names=index_names)
		df = pandas.DataFrame(index=index, columns=columns)

		# add data to the dataframe
		for idx, d in values.items():
		for col, val in d.items():
		df.loc[idx, col] = val

		return df

src/Tools/CMakeLists.txt

+1 −0

Original line number	Diff line number	Diff line
		@@ -49,6 +49,7 @@ INSTALL( FILES ${PROJECT_TOOLS_PATH}/tnl-init${debugExt}
		tnl-time-series2png
		tnl-err2eoc
		tnl-eoc-test-log
		tnl-log-to-html.py
		${PROJECT_TOOLS_PATH}/tnl-compile
		${PROJECT_TOOLS_PATH}/tnl-link
		${PROJECT_TOOLS_PATH}/tnl-bindir

src/Tools/tnl-log-to-html.py

0 → 100755

+45 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		import sys

		from TNL.LogParser import LogParser

		def metadata_to_html(metadata):
		html = "<h2>{}</h2>\n".format(metadata.get("title"))
		html += "<table border=1>\n"
		html += "<tbody>\n"
		for key in sorted(metadata.keys()):
		html += " <tr><td>{}</td><td>{}</td></tr>\n".format(key, metadata[key])
		html += "</tbody>\n"
		html += "</table>\n"
		return html

		def convertLogToHtml(logFileName, htmlFileName):
		# init HTML text
		html = "<html>\n"
		html += "<body>\n"

		parser = LogParser()

		print("Processing file", logFileName)
		for metadata, df in parser.readFile(logFileName):
		html += metadata_to_html(metadata)
		html += df.to_html()

		html += "</body>\n"
		html += "</html>\n"

		print("Writing output to", htmlFileName)
		htmlFile = open(htmlFileName, 'w')
		htmlFile.write(html)
		htmlFile.close()


		arguments = sys.argv[ 1: ]
		logFile = arguments[ 0 ]
		if len(arguments) > 1:
		htmlFile = arguments[ 1 ]
		else:
		htmlFile = logFile.rsplit(".", maxsplit=1)[0] + ".html"

		convertLogToHtml(logFile, htmlFile)

tests/benchmarks/share/CMakeLists.txt

+0 −3

Original line number	Diff line number	Diff line
		@@ -9,11 +9,8 @@ INSTALL( FILES matrix-market
		tnl-run-heat-equation-benchmark
		cuda-profiler.conf
		process-cuda-profile.pl
		tnl-log-to-html.py
		DESTINATION share/tnl-${tnlVersion}/benchmark-scripts )

		INSTALL( FILES tnl-run-spmv-benchmark
		tnl-log-to-html.py
		DESTINATION bin
		PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )

		No newline at end of file