DataParser.psm1

<#
.SYNOPSIS
    This function parses every file in a specified directory. The data from each individual
    file is represented through a HashSet. The Hashtables containing data for each file are
    stored in an array and returned along with meta data.
.PARAMETER DirName
    Path to the directory with the data files.
.PARAMETER Tool
    Name of the tool that generated the data. (NTTTCP, etc.)
.PARAMETER Mode
    Whether the given directory contains 'Baseline' or 'Test' data
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Get-RawData {
    param (
        [Parameter(Mandatory=$true)] [String] $DirName, 
        [Parameter()] [String] $Tool,
        [Parameter()] [String] $Mode="Baseline",
        [Parameter()] [String] $InnerPivot,
        [Parameter()] [String] $OuterPivot
    )

    $output = @{}
    $files = Get-ChildItem -File $DirName
    if ($files.Count -eq 0) {
        Throw "'$DirName' does not contain any data files."
    }

    switch ($Tool) {
        "NTTTCP" {
            $parseFunc = ${Function:Parse-NTTTCP}

            $output."meta" = @{
                "props" = [Array] @(
                    "throughput",
                    "cycles/byte"
                )
                "units" = @{
                    "cycles/byte"     = "cycles/byte"
                    "throughput" = "Gbps"
                }
                "goal" = @{
                    "throughput" = "increase"
                    "cycles/byte"     = "decrease"
                }
                "format" = @{
                    "throughput" = "0.00"
                    "cycles/byte"     = "0.00"
                }
                "noTable"  = [Array] @("filename", "sessions", "bufferLen", "bufferCount")
            }
        }
        "LATTE" {
            $parseFunc = ${Function:Parse-LATTE}

            $output."meta" = @{
                "props" = [Array] @(
                    "latency"
                )
                "units" = @{
                    "latency"  = "us"
                }
                "goal" = @{
                    "latency"  = "decrease"
                }
                "format" = @{
                    "latency"  = "#.0"
                }
                "noTable"  = [Array]@("filename", "sendMethod", "protocol")
            }
        }
        "LagScope" {
            $parseFunc = ${Function:Parse-LagScope}

            $output."meta" = @{
                "props" = [Array] @(
                    "latency"
                )
                "units" = @{
                    "latency"  = "us"
                }
                "goal" = @{
                    "latency"  = "decrease"
                }
                "format" = @{
                    "latency"  = "#.0"
                }
                "noTable"  = [Array]@("filename", "sendMethod", "protocol")
            }
        }
        "CTStraffic" {
            $parseFunc = ${Function:Parse-CTSTraffic}

            $output."meta" = @{
                "props" = [Array] @(
                    "throughput"
                )
                "units" = @{
                    "throughput" = "Gbps"
                }
                "goal" = @{
                    "throughput" = "increase"
                }
                "format" = @{
                    "throughput" = "0.00"
                }
                "noTable"  = [Array]@("filename", "sessions")
            }
        }
        "CPS" {
            $parseFunc = ${Function:Parse-CPS}

            $output."meta" = @{
                "props" = [Array] @(
                    "conn/s",
                    "close/s"
                )
                "units" = @{
                    "conn/s" = ""
                    "close/s" = ""  
                }
                "goal" = @{
                    "conn/s" = "increase"
                    "close/s" = "increase"    
                }
                "format" = @{ 
                    "conn/s" = "0.0"
                    "close/s" = "0.0"
                }
                "noTable" = [Array]@("filename")
            }
        }
    } 

    $PathCosts = @{}
    $InnerPivotKeys = @{}
    $OuterPivotKeys = @{}

    $id = if ($Mode -eq "Baseline") {0} else {1}
    $output.data = [Array]@()  
    for($i = 0; $i -lt $files.Count; $i++) { 
        Write-Progress -Activity "Parsing $($Mode) Data Files..." -Status "Parsing..." -Id $id -PercentComplete (100 * (($i) / $files.Count))
        $output.data += , (& $parseFunc -FileName $files[$i].FullName -InnerPivot $InnerPivot -OuterPivot $OuterPivot `
                            -InnerPivotKeys $InnerPivotKeys  -OuterPivotKeys $OuterPivotKeys -PathCosts $PathCosts)
        if (-not $output.data[-1]) {
            $output.data = $output.data[0..($output.data.Count - 1)] 
        } 
    }

    if ($Tool -in @("CTStraffic", "NTTTCP")) {
        if ($PathCosts.Count -gt 0) {
            # This can be expanded to include the other metrics captured by the pathcosts tool
            Incorporate-PathCosts -Data $output.data -PathCosts $PathCosts   

            $output.meta.props += "total root VP utilization"
            $output.meta.goal["total root VP utilization"] = "decrease"
            $output.meta.format["total root VP utilization"] = "0.00"
            $output.meta.units["total root VP utilization"] = "% Utilization"

            $output.meta.props += "vSwitch root VP utilization"
            $output.meta.goal["vSwitch root VP utilization"] = "decrease"
            $output.meta.format["vSwitch root VP utilization"] = "0.00"
            $output.meta.units["vSwitch root VP utilization"] = "% Utilization"

            $output.meta.props += "cpu utlization"
            $output.meta.goal["cpu utlization"] = "decrease"
            $output.meta.format["cpu utlization"] = "0.00"
            $output.meta.units["cpu utlization"] = "% Utilization"

            $output.meta.props += "cycles/packet"
            $output.meta.goal["cycles/packet"] = "decrease"
            $output.meta.format["cycles/packet"] = "0.00"

            $output.meta.props += "cycles/byte"
            $output.meta.goal["cycles/byte"] = "decrease"
            $output.meta.format["cycles/byte"] = "0.00"
        }
        
    }

    Write-Progress -Activity "Parsing $($Mode) Data Files..." -Status "Done" -Id $id -PercentComplete 100
 
    if ($output."data".Count -eq 0) {
        Write-Error "Failed to parse any file in '$DirName'."
    }

    $output.meta.innerPivotKeys = $InnerPivotKeys.Keys 
    $output.meta.outerPivotKeys = $OuterPivotKeys.Keys 

    return $output
}

<#
.SYNOPSIS
    Reads data from HashTable containing pathcost data and writes the values to the DataEntry
    objects corresponding to each individual file.

.PARAMETER Data
    Array of DataEntry objects which each correspond to a single data file.
.PARAMETER PathCosts
    Hashtable containing a mapping between data filenames and pathcosts data
#>

function Incorporate-PathCosts ($Data, $PathCosts) {
    foreach ($entry in $Data) {
        $file = $entry.filename.Split("\")[-1]
        if ($PathCosts.ContainsKey($file)) {
            
            $cpb = $PathCosts[$file]["Byte path cost (cycles/byte)"]
            $cpu = $PathCosts[$file]["CPU Utilization"]
            $cpp = $PathCosts[$file]["Packet path cost (cycles/packet)"]
            $trvp = $PathCosts[$file]["Total Root VP Utilization"]
            $vsrvp = $PathCosts[$file]["vSwitch Root VP Utilization"]


            # TPUT measures from dedicated tools are more reliable than vswitch counters
            # (which sometimes return 0 erroneously), thus we perform the cycles/byte calculation
            # using our own TPUT measures when possible
            if ($PathCosts[$file].ContainsKey("Total CPU cycles used per second")) {
                $avgTput = ((1000 * 1000 * 1000) / 8) * ($entry["throughput"] | Measure-Object -Average).Average
                $cpb = $PathCosts[$file]["Total CPU cycles used per second"] / $avgTput
            }
                # Sometimes counters mess up and record nearly-zero for tput and it causes
                # cycle/byte calculations to return extremely large numbers. These outliers
                # make visualizations nearly un-readable, thus we filter outliers here. We
                # should look for a more sustainable solution in the future
            if ($cpb -lt 1000) {
                $entry["cycles/byte"] = $cpb 
            } 

            $entry["cycles/packet"] = $cpp
            $entry["cpu utlization"] = $cpu
            $entry["total root VP utilization"] = $trvp
            $entry["vSwitch root VP utilization"] = $vsrvp
            
        }
    }
}


<#
.SYNOPSIS
    Parses a single XML-formated NTTTCP output data file. Relevant data is collected and returned
    as a Hashtable.
.PARAMETER FileName
    Path of file to be parsed.
.PARAMETER InnerPivotKeys
    Set containing all inner pivot keys encountered across all data files
.PARAMETER OuterPivotKeys
    Set containing all outer pivot keys encountered across all data files
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Parse-NTTTCP ([String] $FileName, $InnerPivot, $OuterPivot, $InnerPivotKeys, $OuterPivotKeys, $PathCosts) {
    if ($Filename -match "pathcost") {
        Extract-PathCosts -Filename $Filename -PathCosts $PathCosts 
        return
    }
    
    if ($FileName -notlike "*.xml") {
        return
    }

    $file = (Get-Content $FileName) -as [XML]
    if (-not $file) {
        Write-Warning "Skipped '$FileName' because it is not valid XML."
        return
    }

    [Decimal] $cycles = $file.ChildNodes.cycles.'#text'
    [Decimal] $throughput = ($file.ChildNodes.throughput | where {$_.metric -eq "mbps"})."#text" / 1000
    [Int] $sessions = $file.ChildNodes.parameters.max_active_threads #should this be .num_processors or .parametes.max_active_threads?
    [Int] $bufferLen = $file.ChildNodes.bufferLen
    [Int] $bufferCount = $file.ChildNodes.io

    $dataEntry = @{
        "sessions"    = $sessions
        "throughput"  = $throughput
        "cycles/byte" = $cycles
        "filename"    = $FileName
        "bufferLen"   = $bufferLen
        "bufferCount" = $bufferCount
    }

    $iPivotKey = if ($dataEntry[$InnerPivot]) {$dataEntry[$InnerPivot]} else {""}
    $oPivotKey = if ($dataEntry[$OuterPivot]) {$dataEntry[$OuterPivot]} else {""}

    $InnerPivotKeys[$iPivotKey] = $true
    $OuterPivotKeys[$oPivotKey] = $true

    return $dataEntry
}

<#
.SYNOPSIS
    Parses a single pathcosts data file, extracts relevant data, and stores the data in a
    HashTable.
.PARAMETER Filename
    Path of file to be parsed
.PARAMETER PathCosts
    Hashtable to which pathcosts data is written
#>

function Extract-PathCosts ($Filename, $PathCosts) {
    (Get-Content -Path $Filename | ConvertFrom-Json).psobject.properties | Foreach {
        $key = $_.Name     
        $values = @{}
        $obj = $_.Value 
        if ($_.Value[1]) {
            $obj = $_.Value[1]
        }
        $obj.psobject.properties | Foreach {
            try {
                $values[$_.Name] = [Decimal]$_.Value 
            } catch {}
        }
        $PathCosts[$key] = $values
        
    }
}
 

<#
.SYNOPSIS
    This function parses a single CTStraffic status log file. Desired data is collected and returned
    as a Hashtable.
.PARAMETER Filename
    Path of the status log file to parse.
.PARAMETER InnerPivotKeys
    Set containing all inner pivot keys encountered across all data files
.PARAMETER OuterPivotKeys
    Set containing all outer pivot keys encountered across all data files
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Parse-CTStraffic ( [String] $Filename, $InnerPivot, $OuterPivot , $InnerPivotKeys, $OuterPivotKeys, $PathCosts) {

    if ($Filename -match "pathcost") {
        Extract-PathCosts -Filename $Filename -PathCosts $PathCosts 
        return
    }

    $data = (Get-Content $Filename) -replace '^"|"$','' | ConvertFrom-Csv



    if (-not ($data | Get-Member -Name "In-Flight" -ErrorAction "SilentlyContinue")) {
        Write-Warning "Skipped '$Filename' because it's not a valid ctsTraffic status log. Please verify that it was generated by the -StatusFilename option."
        return
    }

    $bytesToGigabits = [Decimal] 8 / (1000 * 1000 * 1000)

    $throughput = [Array]@()
    $warmupPadding = 2
    $dynamicWarmup = $true
    $cooldownPadding = 2

    for ($i = $warmupPadding; $i -lt $data.Count - $cooldownPadding; $i += 1) { 

        $tputVal = ($data[$i].SendBps, $data[$i].RecvBps | Measure-Object -Maximum).Maximum
        if ($tputVal -eq 0 -and $dynamicWarmup) {
            continue
        } else {
            $dynamicWarmup = $false
        }
        $throughput += [Decimal] $tputVal * $bytesToGigabits
    }  #($data.SendBps | measure -Average).Average * $bytesToGigabits

    $maxSessions = ($data."In-Flight" | measure -Max).Maximum

    $dataEntry = @{
        "sessions"   = $maxSessions
        "throughput" = $throughput
        "filename"   = $Filename
    }

    $iPivotKey = if ($dataEntry[$InnerPivot]) {$dataEntry[$InnerPivot]} else {""}
    $oPivotKey = if ($dataEntry[$OuterPivot]) {$dataEntry[$OuterPivot]} else {""}

    $InnerPivotKeys[$iPivotKey] = $true
    $OuterPivotKeys[$oPivotKey] = $true

    return $dataEntry
}

 
<#
.SYNOPSIS
    This function parses a single LATTE data file. Relevant data is collected and returned
    as a Hashtable.
.PARAMETER Filename
    Path of the status log file to parse.
.PARAMETER InnerPivotKeys
    Set containing all inner pivot keys encountered across all data files
.PARAMETER OuterPivotKeys
    Set containing all outer pivot keys encountered across all data files
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Parse-LATTE ([string] $FileName, $InnerPivot, $OuterPivot, $InnerPivotKeys, $OuterPivotKeys, $PathCosts) {

    if ($Filename -match "pathcost") {
        Extract-PathCost -Filename $Filename -PathCosts $PathCosts 
        return
    }

    $file = Get-Content $FileName

    $dataEntry = @{
        "filename" = $FileName
    }

    $splitline = Remove-EmptyStrings -Arr (([Array]$file)[0]).split(' ')
    if ($splitline[0] -eq "Protocol") {
        $histogram = $false

        foreach ($line in $file) {
            $splitLine = Remove-EmptyStrings -Arr $line.split(' ')

            if ($splitLine.Count -eq 0) {
                continue
            }

            if ($splitLine[0] -eq "Protocol") {
                $dataEntry.protocol = $splitLine[-1]
            }
            if ($splitLine[0] -eq "MsgSize") {
                $dataEntry.msgSize = $splitLine[-1]  # Not currently used for anything
            }

            if ($splitLine[0] -eq "Interval(usec)") {
                $dataEntry.latency = [HashTable] @{} 
                $histogram = $true
                continue
            }

            if ($histogram) {
                $dataEntry.latency[[Int32]$splitLine[0]] = [Int32] $splitLine[-1]
            }
        }

        if (-not $histogram) {
            Write-Warning "No histogram in file $filename"
            return
        }

    } 
    else {
        
        [Array] $latency = @()
        foreach ($line in $file) {
            if (-not ($line -match "\d+")) {
                Write-Warning "Error Parsing file $FileName"
                return
            }
            $latency += ,[int]$line
        }
        $dataEntry.latency = $latency
        $dataEntry.protocol = (($FileName.Split('\'))[-1].Split('.'))[0].ToUpper()
    }

    $dataEntry.sendMethod = (($FileName.Split('\'))[-1].Split('.'))[2]

    $iPivotKey = if ($dataEntry[$InnerPivot]) {$dataEntry[$InnerPivot]} else {""}
    $oPivotKey = if ($dataEntry[$OuterPivot]) {$dataEntry[$OuterPivot]} else {""}

    $InnerPivotKeys[$iPivotKey] = $true
    $OuterPivotKeys[$oPivotKey] = $true
    return $dataEntry
}

<#
.SYNOPSIS
    This function parses a single LagScope data file. Relevant data is collected and returned
    as a Hashtable.
.PARAMETER Filename
    Path of the data file to parse.
.PARAMETER InnerPivotKeys
    Set containing all inner pivot keys encountered across all data files
.PARAMETER OuterPivotKeys
    Set containing all outer pivot keys encountered across all data files
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Parse-LagScope ([string] $FileName, $InnerPivot, $OuterPivot, $InnerPivotKeys, $OuterPivotKeys, $PathCosts) {

    if ($Filename -match "pathcost") {
        Extract-PathCost -Filename $Filename -PathCosts $PathCosts 
        return
    }

    $file = Get-Content $FileName

    $rawDataEntry = @{
        "filename" = $FileName
    }
    $histDataEntry = @{
        "filename" = $Filename
    }

    [Array] $latency = @()

    $hasHistogram = $false
    $histogram = @{}
    foreach ($line in $file) {
        $splitLine = Remove-EmptyStrings $line.Split(" ")

        if ($splitLine.Count -eq 0) {continue}

        if ($line.Trim() -eq "Interval(usec) Frequency") {
            $hasHistogram = $true 
            continue
        }

        if ($hasHistogram) {
            $histogram[[Int]$splitLine[0]] = [Int]$splitLine[1]
            continue
        }

        if ($splitLine[0] -Like "protocol*") {
            $rawDataEntry.protocol = $splitline[-1]
            $histDataEntry.protocol = $splitline[-1]
            continue
        } 
        
        if ($splitLine[-1] -Like "time=*") {
            $latstr = $splitLine[-1]
            $labelLen = "time=".Length
            $unitLen = "us".Length 
            $latency += ,[int] $latstr.Substring($labelLen, $latstr.Length - ($labelLen + $unitLen))
        } 
    }
    $rawDataEntry.latency = $latency 
    $histDataEntry.latency = $histogram

    $iPivotKey = if ($rawDataEntry[$InnerPivot]) {$rawDataEntry[$InnerPivot]} else {""}
    $oPivotKey = if ($rawDataEntry[$OuterPivot]) {$rawDataEntry[$OuterPivot]} else {""}

    $InnerPivotKeys[$iPivotKey] = $true
    $OuterPivotKeys[$oPivotKey] = $true

    $output = @($rawDataEntry)
    if ($histogram.Count -gt 0) {
        $output = @($rawDataEntry, $histDataEntry)
    }

    return $output
}
 

<#
.SYNOPSIS
    This function parses a single file containing CPS data. Each line contains
    conn/s and close/s samples which are extracted into arrays, packaged into a
    HashTable, and returned.
.PARAMETER Filename
    Path of the status log file to parse.
.PARAMETER InnerPivotKeys
    Set containing all inner pivot keys encountered across all data files
.PARAMETER OuterPivotKeys
    Set containing all outer pivot keys encountered across all data files
.PARAMETER InnerPivot
    Name of inner pivot property
.PARAMETER OuterPivot
    Name of outer pivot property
#>

function Parse-CPS ([string] $FileName, $InnerPivot, $OuterPivot, $InnerPivotKeys, $OuterPivotKeys, $PathCosts) {
    if ($Filename -match "pathcost") {
        Extract-PathCost -Filename $Filename -PathCosts $PathCosts 
        return
    }

    $file = Get-Content $FileName

    $dataEntry = @{
        "filename" = $FileName
        "conn/s" = [Array] @()
        "close/s" = [Array] @()
    }

    foreach ($line in $file[1..($file.Count - 1)]) {
        $splitLine = Remove-EmptyStrings -Arr $line.split(' ')
         
        if ($splitLine.Count -eq 0) {
            break
        }

        $dataEntry."conn/s" += ,[Decimal]($splitLine[5])
        $dataEntry."close/s" += ,[Decimal]($splitLine[6]) 
    } 

    $iPivotKey = if ($dataEntry[$InnerPivot]) {$dataEntry[$InnerPivot]} else {""}
    $oPivotKey = if ($dataEntry[$OuterPivot]) {$dataEntry[$OuterPivot]} else {""}

    $InnerPivotKeys[$iPivotKey] = $true
    $OuterPivotKeys[$oPivotKey] = $true
    
    return $dataEntry
}


<#
.SYNOPSIS
    This function removes all empty strings from the given array
.PARAMETER Arr
    Array of strings
#>

function Remove-EmptyStrings ($Arr) {
    $newArr = [Array] @()
    foreach ($val in $arr) {
        $trimVal = $val.Trim()
        if ($trimVal -ne "") {
            $newArr += $trimVal
        }
    }
    return $newArr
}