lib/TMD.DataManipulation.ps1

## Data Reduction Functions
Function Get-NameAndId {
    param(
        [Parameter(Mandatory = $false, ValueFromPipeline = $true)][AllowNull()]$Data = $null,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][string]$IDName = 'id',
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][string]$IDValueFrom = 'Id',
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)]$NameName = 'name',
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)]$NameValueFrom = 'Name'

    )

    Begin { }

    Process {

        ## Get the Name and ID of each of the objects.
        ## Using the defaults you'll have the JSON equivilent to { id: $_.Id; name: $_.Name }
        if ($Data) {
            ## Ensure that our data is wrapped with an array
            if ($Data.GetType() -eq 'System.Array') {
                foreach ($Item in $Data) {
                    [System.Collections.Hashtable]@{
                        $IDName   = $Item.$IDValueFrom
                        $NameName = $Item.$NameValueFrom
                    }
                }
            } else {
                [System.Collections.Hashtable]@{
                    $IDName   = $Data.$IDValueFrom
                    $NameName = $Data.$NameValueFrom
                }
            }
        }
    }

    End { }
}
Function ConvertTo-PlainObject {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)][AllowNull()]
        $InputObject
    )

    return [System.Management.Automation.PSSerializer]::Deserialize([System.Management.Automation.PSSerializer]::Serialize($InputObject))

}
Function Optimize-DataObject {
    param(
        [Parameter(Mandatory = $false, ValueFromPipeline = $true)][AllowNull()]$Data = $null,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][scriptblock]$ProcessingMap,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][String]$StartLabel,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][String]$EndLabel,
        [Parameter(Mandatory = $false, ValueFromPipeline = $false)][int]$Level = 0
    )

    Begin {
        ## Print Start Label, Accomodate Level
        if ($StartLabel) {
            for ($i = 0; $i -lt $Level; $i++) { Write-Host ' ' -NoNewline }
            Write-Host "$StartLabel"
        }
    }

    Process {
        if ($Data) {
            foreach ($Global:Item in $Data) {
                Invoke-Command -ScriptBlock $ProcessingMap -NoNewScope -ErrorAction Continue
            }
        }

    }
    End {
        ## Print Start Label, Accomodate Level
        if ($EndLabel) {
            Write-Host "$EndLabel"
        }
    }
}

Function Test-VariableExists {
    param(
        [Parameter(Mandatory = $true)][String]$Name,
        [Parameter(Mandatory = $false)][Switch]$PassThru

    )
    if (Get-Variable -Name $Name -ErrorAction SilentlyContinue) {
        if ($PassThru) {
            $var = Get-Variable $Name
            return $var
        } else {
            return $true
        }
        return $true
    } else {
        return $false
    }
}

Function New-VariableName ([String]$string) {

    if ($string.Length -gt 0) {

        $delimeters = @(' ', '.', ',', '_', '-', '(', ')', '\', '/', '?', ':', '?')
        $string = Replace-VariableCharWithCamelCase -string $string -delimeter $delimeters

        $string = $string.trim()
        $string = $string.replace('#', 'Num')

        $string = Lowercase-FirstCharacter -string $string
        $string += 'Var'

        $string
    } else {
        $string
    }
}

Function ConvertTo-LowercaseFirstCharacter([String]$string) {
    $string.Substring(0, 1).ToLower() + $string.Substring(1, $string.Length - 1)
}
Function ConvertTo-UppercaseFirstCharacter([String]$string) {
    $string.Substring(0, 1).ToUpper() + $string.Substring(1, $string.Length - 1)
}

Function ConvertTo-VariableCharWithCamelCase(
    [String]$string,
    [String[]]$delimeter
) {

    foreach ($delim in $delimeter) {
        if ($string.contains($delim)) {
            $stringArr = $string.Split($delim)
            $tempString = ''
            for ($i = 0; $i -lt $stringArr.Count; $i++) {
                $word = $stringArr[$i].trim()
                if ($word.length -gt 0) {
                    $tempString += Uppercase-FirstCharacter -string $word
                }
            }
            $string = $tempString
        }
    }
    $string
}

Function ConvertTo-Array {
    param(
        [Parameter(Mandatory = $true, ValueFromPipeline = $True)][AllowNull()]$InputObject
    )

    ## Initalize the Return Array
    $ResultArray = @()

    ## Return an empty array on Null objects
    if (-Not $InputObject) {
        return $ResultArray
    }

    ## Switch based on the Input Object type
    $ObjectType = $InputObject.GetType().BaseType
    switch ($ObjectType) {
        'string' {

            ## A string may be an array object, as a Json string, for example
            $PossibleArray = $InputObject #| ConvertFrom-Json
            if (($PossibleArray.GetType()).BaseType -eq 'System.Array') {
                $ResultArray = @($InputObject)
                break
            }

            ## Replace end characters and quotes
            $InputObject = $InputObject.Replace('[', '')
            $InputObject = $InputObject.Replace(']', '')
            $InputObject = $InputObject.Replace("'", '')
            $InputObject = $InputObject.Replace('"', '')

            ## Split on a comma if there is one
            if ($InputObject.contains(',')) {
                $ResultArray = $InputObject -Split ',', ' ' | ForEach-Object { $_.Trim() }
            } else {
                if ($InputObject -ne '') {
                    $ResultArray = @($InputObject)
                }
            }
            break

        }
        'System.Object[]' {
            if ($ObjectType.BaseType.FullName -eq 'System.Array') {
                $ResultArray = $InputObject
                break
            }
            break
        }
        'System.Array' {
            return $InputObject
        }
        ## No Array type objects were matched, return whatever was passed in as the only object in an array
        Default {
            $ResultArray = @($InputObject)
            break
        }
    }
    ## The comma below is deliberate. PowerShell will (oddly) strip an array down to it's most basic form (a null, a string).
    ## It's related to pipeline optimization Reasons. See https://www.reddit.com/r/PowerShell/comments/6yogs2/functions_that_return_arrays/
    ## using the Comma ultimately 'returns 2 objects back', of which the first (an empty object before the comma) is discarded, and the array
    ## then passed through, even if it is @(null) or @('One String')
    return , $ResultArray
}


Function ConvertTo-Boolean {
    param(
        [AllowNull()]$InputObject
    )
    if ($null -ne $InputObject) {

        switch ($InputObject.GetType().ToString()) {
            'System.String' {
                $trueStrings = @('yes', 'y', 'true', 't', '1')
                $falseStrings = @('no', 'n', 'false', 'f', '0')

                if ($trueStrings.Contains($InputObject.ToLower())) {
                    return $true
                }
                if ($falseStrings.Contains($InputObject.ToLower())) {
                    return $false
                }
                return $false
            }
            'System.Boolean' {
                if ($InputObject) {
                    return $true
                } else {
                    return $false
                }
                return $false
            }
            'System.Int32' {
                if ($InputObject -gt 0) {
                    return $true
                } else {
                    return $false
                }
                return $false
            }

            Default {
                try {
                    $asString = [System.Convert]::ToString($InputObject)
                    $stringLikeTrue = [System.Convert]::ToBoolean($asString)
                    if ($stringLikeTrue) {
                        return $true
                    } else {
                        return $false
                    }
                } catch {
                    return $false
                }
            }
        }
    }
}
function Convert-Size {
    [cmdletbinding()]
    param(
        [validateset('Bytes', 'KB', 'MB', 'GB', 'TB')]
        [string]$From,
        [validateset('Bytes', 'KB', 'MB', 'GB', 'TB')]
        [string]$To,
        [Parameter(Mandatory = $true)]
        [double]$Value,
        [int]$Precision = 4
    )
    switch ($From) {
        'Bytes' { $value = $Value }
        'KB' { $value = $Value * 1024 }
        'MB' { $value = $Value * 1024 * 1024 }
        'GB' { $value = $Value * 1024 * 1024 * 1024 }
        'TB' { $value = $Value * 1024 * 1024 * 1024 * 1024 }
    }

    switch ($To) {
        'Bytes' { return $value }
        'KB' { $Value = $Value / 1KB }
        'MB' { $Value = $Value / 1MB }
        'GB' { $Value = $Value / 1GB }
        'TB' { $Value = $Value / 1TB }

    }

    return [Math]::Round($value, $Precision, [MidPointRounding]::AwayFromZero)

}
## Method to assess an object and return the key at the provided node (Obj.Prop.SubProp.Value)
function Get-Value($object, $key) {
    $p1, $p2 = $key.Split('.')
    if ($p2) { return Get-Value -object $object.$p1 -key $p2 }
    else { return $object.$p1 }
}
## Method for setting values in an object node.address.format like Get-Value above
function Set-Value($object, $key, $Value) {

    ## Deal with appropriate typing
    if ($Value -match '^\d+$') { $Value = [Int64]$Value }
    if ($Value -eq 'true') { $Value = $True }
    if ($Value -eq 'false') { $Value = $False }

    ## Split the Key into comparable parts
    # $p1, $p2 = $key.Split(".") | Select-Object -First 2
    $p1, $p2 = $key.Split('.')

    ## Is the node an array reference
    if ($p1 -like '*]') {

        ## This node is an array object, separte the array node and the array pointer
        $p1Node = $p1 | Select-String -Pattern '.*[a-zA-Z]' | ForEach-Object { $_.Matches[0].Value }
        $p1Pointer = $p1 | Select-String -Pattern '[0-9]' | ForEach-Object { $_.Matches[0].Value }
        if ($p2) { Set-Value -object $object.$p1Node[$p1Pointer] -key $p2 -Value $Value }
        else { $object.$p1 = $Value }
    }

    else {
        ## This object is a flat node object, not an array.
        # Write-Host "Node is an object"
        if ($p2) { Set-Value -object $object.$p1 -key $p2 -Value $Value }
        else { $object.$p1 = $Value }
    }
}

## Method for getting a TimeSpan that is Human Readable format
function Get-TimeSpanString {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $false, ParameterSetName = 'DateTime')]
        [datetime]
        $Start = (Get-Date),

        [Parameter(Mandatory = $True, ParameterSetName = 'DateTime', Position = 0 )]
        [datetime]
        $End,

        [Parameter(Mandatory = $True, ParameterSetName = 'Timespan', Position = 0)]
        [timespan]
        $Timespan,

        [Parameter(Mandatory = $False)]
        [int]$TimespanUnits = 3
    )

    process {

        $SecondsReference = @{
            Year   = (60 * 60 * 24 * 365)
            Month  = (60 * 60 * 24 * 30)
            Day    = (60 * 60 * 24)
            Hour   = (60 * 60)
            Minute = 60
        }

        ## Calculate the Timespan
        if ($PSCmdlet.ParameterSetName -eq 'DateTime') {
            $Timespan = (New-TimeSpan -Start $Start -End $End)
        }

        ## Get the Total number of seconds of the timspand (abs, to force a positive number)
        $TimespanSeconds = [math]::Abs($Timespan.TotalSeconds)

        ## Create a Span String to collect the required parts to
        $SpanStringParts = [System.Collections.ArrayList]@()

        ## Record how many years
        $Years = [math]::Floor($TimespanSeconds / $SecondsReference.Year)
        if ($Years -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$($Years)y"))
            $TimespanSeconds -= ($Years * $SecondsReference.Year)
        }

        ## Record how many Months
        $Months = [math]::Floor($TimespanSeconds / $SecondsReference.Month)
        if ($Months -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$($Months)Mo"))
            $TimespanSeconds -= ($Months * $SecondsReference.Month)
        }

        ## Record how many Days
        $Days = [math]::Floor($TimespanSeconds / $SecondsReference.Day)
        if ($Days -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$($Days)d"))
            $TimespanSeconds -= ($Days * $SecondsReference.Day)
        }

        ## Record how many Hours
        $Hours = [math]::Floor($TimespanSeconds / $SecondsReference.Hour)
        if ($Hours -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$($Hours)h"))
            $TimespanSeconds -= ($Hours * $SecondsReference.Hour)
        }

        ## Record how many Minutes
        $Minutes = [math]::Floor($TimespanSeconds / $SecondsReference.Minute)
        if ($Minutes -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$($Minutes)m"))
            $TimespanSeconds -= ($Minutes * $SecondsReference.Minute)
        }

        ## Record how many Seconds
        if ($TimespanSeconds -and ($SpanStringParts.Count -lt $TimespanUnits)) {
            [void]($SpanStringParts.Add("$([math]::Floor($TimespanSeconds))s"))
        }

        ## Record how many Seconds
        if ($SpanStringParts.Count -lt $TimespanUnits) {
            [void]($SpanStringParts.Add("$($Timespan.Milliseconds)ms"))
        }

        ## Combine and return the Span String
        $SpanStringParts -join ' '
    }
}

function Split-CsvFile {
    <#
    .SYNOPSIS
    Split a CSV file into small batches
 
    .DESCRIPTION
    This function consumes a CSV file and
    splits it into batches, using the defined splitting
    parameters
 
    .PARAMETER Path
    Path to the CSV File to split
 
    .PARAMETER OutputFolder
    Folder path to save the split files to.
    The folder (path) will be created if it does not
    already exist
 
    .PARAMETER BatchSize
    The number of lines of CSV data to add to
    each of the output files
 
    .EXAMPLE
       Split-CsvFile -Path 'Big.csv' -BatchSize 5000
 
    .EXAMPLE
    Split-CsvFile -Path 'Big.csv' -BatchSize 20000 -OutFolderPath './Split'
 
    .EXAMPLE
       Split-CsvFile -Path @('Medium.csv', 'Big.csv') -BatchSize 5000
 
    .OUTPUTS
    None
    #>


    [CmdletBinding()]      # Always add CmdletBinding to expose the common Cmdlet variables
    [OutputType([Bool])]   # Add this if the function has an output
    param(
        [Parameter(Mandatory = $true,
            Position = 0,
            ValueFromPipeline = $true, # Make sure you have a process block if the function accepts value(s) from the pipeline
            ValueFromPipelineByPropertyName = $true)]
        [String[]]$Path,

        [Parameter(Mandatory = $false,
            Position = 1,
            ValueFromPipeline = $true, # Make sure you have a process block if the function accepts value(s) from the pipeline
            ValueFromPipelineByPropertyName = $true)]
        [String]$OutFolderPath,

        [Parameter(Mandatory = $False,
            Position = 2,
            ValueFromPipeline = $true, # Make sure you have a process block if the function accepts value(s) from the pipeline
            ValueFromPipelineByPropertyName = $true)]
        [Int32]$BatchSize = 25000
    )

    process {

        if (-Not $OutputFolder) {
            $OutputFolder = ($Path[0] | Split-Path -Parent)
        }

        ## Create the OutputFolder
        Test-FolderPath $OutputFolder

        # Iterate over the Files provided
        foreach ($File in $Path) {

            ## Read the Header Row
            if (-Not (Test-Path $File)) {
                throw "File: $File does not exist"
            }

            ## Begin a FileStream Reader
            $BaseFileName = Split-Path -LeafBase -Path $File
            $OriginalFileReader = New-Object System.IO.StreamReader $File
            $HeaderRow = $OriginalFileReader.ReadLine()

            ## Start our Counters and Read the New File
            $BatchNumber = 1
            $BatchCsvLines = [System.Collections.ArrayList]@()
            while ($null -ne ($Line = $OriginalFileReader.ReadLine())) {

                ## Add this line
                [void]($BatchCsvLines.Add($Line))

                ## Carve off a job to analyze these rows
                if ($BatchCsvLines.Count -eq $BatchSize) {

                    # Write the batch to the file
                    $OutFilePath = Join-Path $OutputFolder ("$($BaseFileName)_part_$($BatchNumber).csv")
                    Set-Content -Path $OutFilePath -Force -Encoding utf8 -Value $HeaderRow
                    Add-Content -Path $OutFilePath -Encoding utf8 -Force -Value $BatchCsvLines

                    ## Empty the batch array and increment our Batch Number
                    $BatchCSVLines.Clear()
                    $BatchNumber++
                }
            }

            # Close the File Reader
            $OriginalFileReader.Close()

            ## Write the remainder of the
            $OutFilePath = Join-Path $OutputFolder ("$($BaseFileName)_part_$($BatchNumber).csv")
            Set-Content -Path $OutFilePath -Force -Encoding utf8 -Value $HeaderRow
            Add-Content -Path $OutFilePath -Encoding utf8 -Force -Value $BatchCsvLines
            Remove-Variable OriginalFileReader

            Move-Item $File -Destination ("$File.split") -Force
        }
    }
}