Back to the main data prep VBA page
I’ve been experimenting recently with Excel VBA dictionaries and arrays to avoid writing to disk while parsing log files. I have been using a dictionary to cache transactional data, and when the transaction parsing is completed, I write the complete transaction metrics to a time series array. Once the parsing of log files is complete, I’ll write the time series profile out to disk.
[vb]
Sub parsePERFMSG()
‘set variables
”””””””
Dim rawLOG As String, rawLOGpath As String, splitArray() As String, splitArrayDate() As String, splitTime() As String
Dim threadID As String, tempThread As String, tempThreadID As String, tsDAY As Date
Dim lineCt As Double, tsXLDAY As Double, tempVAL As Double
Dim IXNvalues() As Variant
pathFull = Sheets("input").Range("C13")
Set objrawLOG = CreateObject("Scripting.FileSystemObject")
rawLOGpath = objrawLOG.GetFile(pathFull).ParentFolder.path & "\"
Dim ixnDIC As New Dictionary
ixnDIC.Add "date", ""
ixnDIC.Add "time", ""
ixnDIC.Add "thread", ""
ixnDIC.Add "buckets", ""
ixnDIC.Add "candidates", ""
ixnDIC.Add "matched", ""
ixnDIC.Add "MSselect", ""
ixnDIC.Add "MSmatch", ""
ixnDIC.Add "MStot", ""
ixnDIC.Add "type", ""
ixnDIC.Add "user", ""
ixnDIC.Add "rows", ""
Dim ixnTS As New Dictionary
ixnTS.Add "bkts", "1"
ixnTS.Add "cands", "2"
ixnTS.Add "matched", "3"
ixnCOLct = 6
headerLine = "date time,rows in,rows read,bkts,cands,matched,threads"
colHead = 6
colTrd = 1
‘setup clean files
”””””””””
srcCSV = Dir(objrawLOG.GetFile(pathFull).ParentFolder.path & "\")
logCT = 0: logSO = 0
Dim logNameArr(1000, 1) As Variant
Do While srcCSV <> ""
tempFILE = rawLOGpath & "\" & srcCSV & ""
If srcCSV Like "csv*" Or srcCSV Like "THREADgrep*" Or srcCSV = "csvLOGclean.txt" Then
Set delFILE = CreateObject("Scripting.FileSystemObject")
delFILE.Deletefile tempFILE, True
End If
If srcCSV Like "*.dat*" Then
logCT = logCT + 1
logNameArr(logCT, 1) = srcCSV
End If
srcCSV = Dir
Loop
ReDim Preserve IXNvalues(logCT * 1440 + 1, 3 + 1, 5)
fnameLOGclean = "csvLOGclean.txt"
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objCLEAN = objFSO.CreateTextFile(rawLOGpath & "\" & fnameLOGclean, 1)
objCLEAN.WriteLine "date time,host,thread,ctxfree,ixn type,user,buckets,candidates,matched,select ms,match ms,total ms,rows in,rows read"
‘loop through mlg files
”””””””””””’
tsChk = 0
For logLoop = 1 To logCT
rawLOG = logNameArr(logLoop, 1)
srcFullPath = rawLOGpath & rawLOG
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objrawLOG = objFSO.OpenTextFile(rawLOGpath & rawLOG, 1)
Do Until objrawLOG.AtEndOfStream
On Error Resume Next
txnstring = objrawLOG.ReadLine
If Not txnstring Like "*/*" Then GoTo skipIXN
If txnstring Like "*UDP Collector application!" Then GoTo skipIXN
splitArray() = Split(txnstring, "|")
splitArrayDate() = Split(splitArray(1), " ")
ixnDIC.Item("date") = splitArrayDate(0)
ixnDIC.Item("time") = splitArrayDate(1) ‘Right(splitArray(1), Len(splitArray(1)) – 11)
ixnDIC.Item("host") = splitArray(3)
ixnDIC.Item("thread") = splitArray(2)
ixnDIC.Item("type") = splitArray(6)
ixnDIC.Item("user") = splitArray(7)
ixnDIC.Item("buckets") = splitArray(28)
ixnDIC.Item("candidates") = splitArray(29)
ixnDIC.Item("matched") = splitArray(30)
ixnDIC.Item("MSselect") = splitArray(31)
ixnDIC.Item("MSmatch") = splitArray(32)
ixnDIC.Item("MStot") = splitArray(10)
ixnDIC.Item("rowsIN") = splitArray(22)
ixnDIC.Item("rowsOUT") = splitArray(24)
ixnDIC.Item("ctxfree") = splitArray(5)
outputline = ixnDIC.Item("date") & " " & ixnDIC.Item("time")
‘compile minute stats
‘determine row
tsDAY = ixnDIC.Item("date"): tsXLDAY = tsDAY
splitTime() = Split(ixnDIC.Item("time"), ":")
tsHour = splitTime(Val(0))
tsMinute = splitTime(Val(1))
tsTIME = tsXLDAY + tsHour / 24 + tsMinute / 1440
If tsChk = 0 Then
tsStart = tsTIME
tsChk = 1
End If
rowID = Int(tsTIME – tsStart) * 1440 + Hour(tsTIME – tsStart) * 60 + Minute(tsTIME – tsStart) + 2
‘determine if this is a new IXN type, find column
tgtCol = ""
If ixnTS.Exists(ixnDIC.Item("type")) Then
tgtCol = ixnTS.Item(ixnDIC.Item("type"))
pause = 1
Else
ixnCOLct = ixnCOLct + 1
ixnTS.Add ixnDIC.Item("type"), ixnCOLct
tgtCol = ixnCOLct
headerLine = headerLine & ixnDIC.Item("type") & " ct," & ixnDIC.Item("type") & " ms,"
ReDim Preserve IXNvalues(20000, 3 + (ixnCOLct – 3) * 2 + 1)
End If
‘update ixn array with values
tempVAL = 1: IXNvalues(rowID, colHead + (tgtCol – colHead) * 2 – 1) = _
IXNvalues(rowID, colHead + (tgtCol – colHead) * 2 – 1) + tempVAL
tempVAL = ixnDIC.Item("rowsIN"): IXNvalues(rowID, 1) = IXNvalues(rowID, 1) + tempVAL
tempVAL = ixnDIC.Item("rowsOUT"): IXNvalues(rowID, 2) = IXNvalues(rowID, 2) + tempVAL
tempVAL = ixnDIC.Item("buckets"): IXNvalues(rowID, 3) = IXNvalues(rowID, 3) + tempVAL
tempVAL = ixnDIC.Item("candidates"): IXNvalues(rowID, 4) = IXNvalues(rowID, 4) + tempVAL
tempVAL = ixnDIC.Item("matched"): IXNvalues(rowID, 5) = IXNvalues(rowID, 5) + tempVAL
tempVAL = ixnDIC.Item("MStot"): _
IXNvalues(rowID, colHead + (tgtCol – colHead) * 2 – 1 + 1) = _
IXNvalues(rowID, colHead + (tgtCol – colHead) * 2 – 1 + 1) + tempVAL
‘trap threadID
tempThread = IXNvalues(rowID, 6)
tempThreadID = ixnDIC.Item("thread") & ";"
If tempThread = "" Then IXNvalues(rowID, 6) = ixnDIC.Item("thread") & ";": GoTo skipThread
If InStr(1, tempThread, tempThreadID, 1) > 0 Then GoTo skipThread
tempThread = tempThread & ixnDIC.Item("thread") & ";"
IXNvalues(rowID, 6) = tempThread
skipThread:
tempThread = ""
‘construct clean line
””””””””””’
outputline = outputline & "," & ixnDIC.Item("host"): ixnDIC.Item("host") = ""
outputline = outputline & "," & ixnDIC.Item("thread"): ixnDIC.Item("thread") = ""
outputline = outputline & "," & ixnDIC.Item("ctxfree"): ixnDIC.Item("ctxfree") = ""
outputline = outputline & "," & ixnDIC.Item("type"): ixnDIC.Item("type") = ""
outputline = outputline & "," & ixnDIC.Item("user"): ixnDIC.Item("user") = ""
outputline = outputline & "," & ixnDIC.Item("buckets"): ixnDIC.Item("buckets") = ""
outputline = outputline & "," & ixnDIC.Item("candidates"): ixnDIC.Item("candidates") = ""
outputline = outputline & "," & ixnDIC.Item("matched"): ixnDIC.Item("matched") = ""
outputline = outputline & "," & ixnDIC.Item("MSselect"): ixnDIC.Item("MSselect") = ""
outputline = outputline & "," & ixnDIC.Item("MSmatch"): ixnDIC.Item("MSmatch") = ""
outputline = outputline & "," & ixnDIC.Item("MStot"): ixnDIC.Item("MStot") = ""
outputline = outputline & "," & ixnDIC.Item("rowsIN"): ixnDIC.Item("rowsIN") = ""
outputline = outputline & "," & ixnDIC.Item("rowsOUT"): ixnDIC.Item("rowsOUT") = ""
objCLEAN.WriteLine outputline
outputline = ""
skipIXN:
Loop
Next logLoop
objrawLOG.Close
‘print time series data
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objTS = objFSO.CreateTextFile(rawLOGpath & "\csvTS.txt", 1)
objTS.WriteLine headerLine
For i = 2 To logCT * 1440 + 1
tsDAY = tsStart + (i – 2) / 1440
tsLine = tsDAY & ","
For j = 1 To 3 + (ixnCOLct – 3) * 2
tsLine = tsLine & IXNvalues(i, j) & ","
Next j
objTS.WriteLine tsLine
tsLine = ""
Next i
End Sub
[/vb]
This is an evolution of my basic text parsing script. In this example I look for a multi-line transaction pattern typically from a single thread extract from a log file. This pattern forms a long running transaction that I need to track. This script looks for the pattern, and reconstructs to total transaction into a single line that can be used to generate complete transaction profiles and subsequent fenced latency analyses.
[vb]
Sub patternMLG()
‘set variables
”””””””
Dim rawLOG As String, rawLOGpath As String, splitArray() As String, splitTime() As String
‘Dim threadID As String, tsDAY As Date
‘Dim lineCt As Double, tsXLDAY As Double, tempVAL As Double
‘Dim IXNvalues() As Variant
‘ReDim Preserve IXNvalues(10000, 3)
pathFull = Sheets("input").Range("C13")
Set objrawLOG = CreateObject("Scripting.FileSystemObject")
rawLOGpath = objrawLOG.GetFile(pathFull).ParentFolder.path & "\"
Dim ixnDIC As New Dictionary
ixnDIC.Add "MPI_CtxDbxGetRecnoList: stmt =", ""
ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_reclist", ""
ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memcmpd", ""
ixnDIC.Add "MPI_MxmRunSearch:", ""
ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memhead", ""
ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_memexta", ""
ixnDIC.Add "MAD_PkgSelAcbByStmt: mpi_audhead", ""
ixnDIC.Add "MAD_DbxCommit: stmt=’commit’", ""
ixnDIC.Add "IXN=MEMSEARCH", ""
‘setup clean files
”””””””””
srcCSV = Dir(objrawLOG.GetFile(pathFull).ParentFolder.path & "\")
Do While srcCSV <> ""
tempFILE = rawLOGpath & "\" & srcCSV & ""
If srcCSV Like "csvTHREAD*" Then
Set delFILE = CreateObject("Scripting.FileSystemObject")
delFILE.Deletefile tempFILE, True
End If
srcCSV = Dir
Loop
fnameLOGclean = "csvTHREAD.txt"
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objCLEAN = objFSO.CreateTextFile(rawLOGpath & "\" & fnameLOGclean, 1)
objCLEAN.WriteLine "MPI_CtxDbxGetRecnoList: stmt,MAD_PkgSelAcbByStmt: mpi_reclist,MAD_PkgSelAcbByStmt: mpi_memcmpd,MPI_MxmRunSearch:,MAD_PkgSelAcbByStmt: mpi_memhead,MAD_PkgSelAcbByStmt: mpi_memexta,MAD_PkgSelAcbByStmt: mpi_audhead,MAD_DbxCommit: stmt=’commit’,IXN=MEMSEARCH,ms tot"
‘loop through mlg files
”””””””””””’
procRec = 0
rawLOG = "sampTHREAD.txt"
srcFullPath = rawLOGpath & rawLOG
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objrawLOG = objFSO.OpenTextFile(rawLOGpath & rawLOG, 1)
Do Until objrawLOG.AtEndOfStream
txnstring = objrawLOG.ReadLine
If txnstring Like "*MPI_CtxDbxGetRecnoList: stmt*" Then
ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = ""
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = ""
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = ""
ixnDIC.Item("MPI_MxmRunSearch:") = ""
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = ""
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = ""
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = ""
ixnDIC.Item("MAD_DbxCommit: stmt=’commit’") = ""
ixnDIC.Item("IXN=MEMSEARCH") = ""
ixnDIC.Item("ms tot") = ""
ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = Left(txnstring, 23)
End If
If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_reclist*" Then _
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = Left(txnstring, 23)
If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memcmpd*" Then _
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = Left(txnstring, 23)
If txnstring Like "*MPI_MxmRunSearch:*" Then _
ixnDIC.Item("MPI_MxmRunSearch:") = Left(txnstring, 23)
If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memhead*" Then _
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = Left(txnstring, 23)
If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_memexta*" Then _
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = Left(txnstring, 23)
If txnstring Like "*MAD_PkgSelAcbByStmt: mpi_audhead*" Then _
ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = Left(txnstring, 23)
If txnstring Like "*MAD_DbxCommit: stmt=’commit’*" Then _
ixnDIC.Item("MAD_DbxCommit: stmt=’commit’") = Left(txnstring, 23)
If txnstring Like "*IXN=MEMSEARCH*" Then
ixnDIC.Item("IXN=MEMSEARCH") = Left(txnstring, 23)
FindEQ5 = 0
For j = 1 To 5
FindEQ5 = InStr(FindEQ5 + 1, txnstring, "=")
If FindEQ5 = 0 Then Exit For
Next
ixnDIC.Item("ms tot") = Replace(Replace(Right(txnstring, Len(txnstring) – FindEQ5), " seconds.", ""), " ", "") * 1000
outputline = ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt"): ixnDIC.Item("MPI_CtxDbxGetRecnoList: stmt") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_reclist") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memcmpd") = ""
outputline = outputline & "," & ixnDIC.Item("MPI_MxmRunSearch:"): ixnDIC.Item("MPI_MxmRunSearch:") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memhead") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_memexta") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead"): ixnDIC.Item("MAD_PkgSelAcbByStmt: mpi_audhead") = ""
outputline = outputline & "," & ixnDIC.Item("MAD_DbxCommit: stmt=’commit’"): ixnDIC.Item("MAD_DbxCommit: stmt=’commit’") = ""
outputline = outputline & "," & ixnDIC.Item("IXN=MEMSEARCH"): ixnDIC.Item("IXN=MEMSEARCH") = ""
outputline = outputline & "," & ixnDIC.Item("ms tot"): ixnDIC.Item("ms tot") = ""
objCLEAN.WriteLine outputline
End If
Loop
objrawLOG.Close
End Sub
[/vb]
text