Sanitization.psm1

class RedactionRule {
    [ValidateNotNullOrEmpty()][string]$Pattern
    [scriptblock]$NewValueFunction
    [string]$NewValueString
    [ValidateSet('String','Function')][string]$Type

    RedactionRule ([string]$Pattern, [string]$NewValueString) {
        $this.Pattern          = $Pattern
        $this.NewValueString   = $NewValueString
        $this.NewValueFunction = $null
        $this.Type             = 'String'
    }

    RedactionRule ([string]$Pattern, [scriptblock]$NewValueFunction) {
        $this.Pattern          = $Pattern
        $this.NewValueFunction = $NewValueFunction
        $this.NewValueString   = $null
        $this.Type             = 'Function'
    }

    [string] Evaluate([int]$Seed){
        if($this.Type -eq 'String'){
            return ($this.NewValueString -f $Seed)
        }else{ # $this.Type -eq 'Function'
            return (& $this.NewValueFunction $Seed)
        }
    }
}
function Convert-IPValue {
    [int]$t = $args[0]

    $o4 = ($t % 254) + 1
    $t = $t / 254
    $o3 = $t % 254
    $t = $t / 254 
    $o2 = $t % 254
    $t = $t / 254
    $o1 = $t % 254 + 11

    "$o1.$o2.$o3.$o4"
}
<#
.SYNOPSIS
Redact sensitive information from a file
 
.DESCRIPTION
Redact sensitive information from a file as an array of strings or one long string by defined redaction rules
 
.PARAMETER RedactionRule
Array of rules to redact by
 
.PARAMETER Path
Specifies a path to one or more locations. Wildcards are permitted.
 
.PARAMETER LiteralPath
Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is
used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters,
enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any
characters as escape sequences.
 
.PARAMETER ReadRaw
Ignores newline characters and pass the entire contents of a file in one string with the newlines preserved.
By default, newline characters in a file are used as delimiters to separate the input into an array of strings.
Process the file as one string instead of processing the strings line by line.
 
.EXAMPLE
$WULog = "$env:USERPROFILE\Desktop\WULog.log"
Get-WindowsUpdateLog -LogPath $WULog
Invoke-FileRedaction -Path $WULog -ReadRaw -RedactionRule @(
    New-RedactionRule '(?<=\d{4}\/\d{2}\/\d{2} \d{2}\:\d{2}\:\d{2}\.\d{7} \d{1,5} \d{1,5}\s+)\w+(?=\s+)' 'Component_{0}'
)
 
.NOTES
Invoke-RedactionRule creates 2 files in the same location of the input file,
the redacted file with "-Sanitized.txt" suffix
and the convertion table csv file with "-ConvertionTable.csv" suffix.
By default all strings in the files are processed with Invoke-Redaction with the -Consistent parameter.
#>

function Invoke-FileRedaction {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true, 
            Position = 0)]
        [RedactionRule[]]$RedactionRule,
        # Specifies a path to one or more locations. Wildcards are permitted.
        [Parameter(Mandatory=$true,
                   Position=1,
                   ParameterSetName="Path",
                   ValueFromPipeline=$true,
                   ValueFromPipelineByPropertyName=$true,
                   HelpMessage="Path to one or more locations.")]
        [ValidateNotNullOrEmpty()]
        [SupportsWildcards()]
        [string[]]
        $Path,
        # Specifies a path to one or more locations. Unlike the Path parameter, the value of the LiteralPath parameter is
        # used exactly as it is typed. No characters are interpreted as wildcards. If the path includes escape characters,
        # enclose it in single quotation marks. Single quotation marks tell Windows PowerShell not to interpret any
        # characters as escape sequences.
        [Parameter(Mandatory=$true,
                   Position=1,
                   ParameterSetName="LiteralPath",
                   ValueFromPipelineByPropertyName=$true,
                   HelpMessage="Literal path to one or more locations.")]
        [Alias("PSPath")]
        [ValidateNotNullOrEmpty()]
        [string[]]
        $LiteralPath,
        [switch]$ReadRaw
    )

    begin {
        $ExportCSVProperties = @{}
        if($PSVersionTable.PSVersion.Major -le 5){
            $ExportCSVProperties['NoTypeInformation'] = $true
        } 
    }

    process {
        $paths = @()
        if ($psCmdlet.ParameterSetName -eq 'Path') {
            foreach ($aPath in $Path) {
                if (!(Test-Path -Path $aPath)) {
                    $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist."
                    $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound
                    $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath
                    $psCmdlet.WriteError($errRecord)
                    continue
                }
            
                # Resolve any wildcards that might be in the path
                $provider = $null
                $paths += $psCmdlet.SessionState.Path.GetResolvedProviderPathFromPSPath($aPath, [ref]$provider)
            }
        }
        else {
            foreach ($aPath in $LiteralPath) {
                if (!(Test-Path -LiteralPath $aPath)) {
                    $ex = New-Object System.Management.Automation.ItemNotFoundException "Cannot find path '$aPath' because it does not exist."
                    $category = [System.Management.Automation.ErrorCategory]::ObjectNotFound
                    $errRecord = New-Object System.Management.Automation.ErrorRecord $ex,'PathNotFound',$category,$aPath
                    $psCmdlet.WriteError($errRecord)
                    continue
                }
            
                # Resolve any relative paths
                $paths += $psCmdlet.SessionState.Path.GetUnresolvedProviderPathFromPSPath($aPath)
            }
        }
        
        foreach ($aPath in $paths) {        
            # Output will be on the same directory
            $SanitizedFilePath = $aPath + "-Sanitized.txt"
            'Sanitized File: {0}' -f $SanitizedFilePath | Write-Verbose
            $ConvertionTableFilePath = $aPath + "-ConvertionTable.csv"
            'Convertion Table File: {0}' -f $ConvertionTableFilePath | Write-Verbose 
            
            $TotalLines = Get-Content $aPath | Measure-Object -Line | Select-Object -ExpandProperty Lines
            'Total No.Lines: {0}' -f $TotalLines | Write-Verbose
            if ($TotalLines -eq 0) {
                $TotalLines = 1
            }
            
            Write-Progress -Activity "Redacting sensitive data from file: `"$aPath`"" -Id 1
            
            Get-Content $aPath -Raw:$ReadRaw | Invoke-Redaction -RedactionRule $RedactionRule -Consistent -OutConvertionTable 'ConvertionTable' -TotalLines $TotalLines | Out-File -FilePath $SanitizedFilePath
            $ConvertionTable.Keys | Select-Object -Property @{N = 'NewValue'; E = {$ConvertionTable[$_]}}, @{N = 'Original'; E = {$_}} | Sort-Object -Property NewValue | Export-Csv -Path $ConvertionTableFilePath @ExportCSVProperties

            [PSCustomObject]@{
                Original        = $aPath
                Sanitized       = $SanitizedFilePath
                ConvertionTable = $ConvertionTableFilePath            
            }       
        }
    }
    
    end {
        Write-Progress -Activity "[Done] Redacting sensitive data from file: `"$aPath`" [Done]" -Id 1 -Completed
    }
}
<#
.SYNOPSIS
Redact sensitive information from an object
 
.DESCRIPTION
Redact sensitive information from an object as string by defined redaction rules
 
.PARAMETER RedactionRule
Array of redaction rules to redact by
 
.PARAMETER InputObject
String to redact sensitive information from
 
.PARAMETER Consistent
Saves discovered values in a ConvertionTable (hash table), when the same values disceverd again they are replaced with the same string that was generated the first time from the redaction rule NewValue function or NewValue formatted string.
It uses a uniqueness value to generate new value from the redaction rule (if applicable).
if Consistent is ommitted generation of new value from redaction rule's NewValues is based on current line number.
 
.PARAMETER OutConvertionTable
Creates a variable with the specified name and the ConvertionTable as its value.
 
.PARAMETER AsObject
Return an object with the old string, the processed string, line number and if the string was changed or not instead of just a processed string.
 
.PARAMETER TotalLines
Number of lines that are going to be processed over the pipeline.
Relevant for showing informative progress bar.
 
.EXAMPLE
Replace all a-z letters with '+' sign
$RedactionRule = New-RedactionRule -Pattern '[a-z]' -NewValueString '+'
ipconfig /all | Invoke-Redaction -RedactionRule $RedactionRule
 
.EXAMPLE
Replace all service names that start with the letter 's' with 's_{0}', where {0} is replaced by uniqueness factor.
Each unique serivce name will be replaced with a unique new value 's_{0}' and it will stay consistent if the service shows up multiple times.
$RedactionRule = New-RedactionRule -Pattern '(?<=\s)[Ss].+' -NewValueString 's_{0}'
Get-Process | Out-String | Invoke-Redaction -RedactionRule $RedactionRule -Consistent
 
.NOTES
 
#>

function Invoke-Redaction {
    [Alias('Invoke-Sanitization', 'irdac', 'isntz')]
    [CmdletBinding()]
    param(
        [Parameter(Mandatory = $true, 
            Position = 0)]
        [RedactionRule[]]$RedactionRule,
        # One line string
        [Parameter(Mandatory = $true,  
            ValueFromPipeline = $true,
            Position = 1)]
        [AllowEmptyString()] # Incoming lines can be empty, so applied because of the Mandatory flag
        [psobject]
        $InputObject,
        # Requires $ConvertionTable but if it won't be provided, empty hash table for $ConvertionTable will be initialized instead
        [switch]
        $Consistent,
        [switch]
        $AsObject,
        [ValidateRange(1, [int]::MaxValue)]
        [int]
        $TotalLines = 1
    )

    DynamicParam {
        if ($Consistent) {
            $ParameterName = 'OutConvertionTable'
            $RuntimeParameterDictionary = New-Object System.Management.Automation.RuntimeDefinedParameterDictionary
            $AttributeCollection = New-Object System.Collections.ObjectModel.Collection[System.Attribute]
            
            $ValidateNotNullOrEmptyAttribute = New-Object System.Management.Automation.ValidateNotNullOrEmptyAttribute
            $AttributeCollection.Add($ValidateNotNullOrEmptyAttribute)
            
            $ParameterAttribute = New-Object System.Management.Automation.ParameterAttribute
            $AttributeCollection.Add($ParameterAttribute)
            
            $RuntimeParameter = New-Object System.Management.Automation.RuntimeDefinedParameter($ParameterName, [string], $AttributeCollection)
            $RuntimeParameterDictionary.Add($ParameterName, $RuntimeParameter)
            
            return $RuntimeParameterDictionary
        }
    }

    Begin {
        if ($Consistent) {
            $OutConvertionTable = $PSBoundParameters[$ParameterName]            
            $ConvertionTable = @{}
            $Uniqueness = 0
        }

        #region Write-Progress calculation block initialization
        $PercentComplete = 0
        $PercentStep = 100 / $TotalLines
        [double]$AverageTime = 0
        [int]$SecondsRemaining = $AverageTime * $TotalLines
        $StopWatch = [System.Diagnostics.Stopwatch]::new()
        $StopWatch.Start()
        #endregion

        $LineNumber = 0
    }

    Process {
        $CurrentString = $InputObject.ToString()
        $CurrentStringChanged = $false

        foreach ($Rule in $RedactionRule) {
            $Matches = Select-String -InputObject $CurrentString -Pattern $Rule.Pattern -AllMatches | Select-Object -ExpandProperty Matches | Sort-Object -Property Index -Descending # Sort Descending is required so the replacments won't overwrite each other
            if ($Matches) {
                $CurrentStringChanged = $true
                $StrSB = New-Object System.Text.StringBuilder($CurrentString)
                Foreach ($Match in $Matches) {
                    $MatchedValue = $Match.Value

                    'MatchedValue = {0}' -f $MatchedValue | Write-Verbose

                    if ($Consistent) {
                        if ($null -eq $ConvertionTable[$MatchedValue]) {
                            # MatchedValue doesn't exist in the ConvertionTable
                            # Adding MatchedValue to the ConvertionTable, add it with line number (if {0} is specified in $NewValue)
                            $ConvertionTable[$MatchedValue] = $Rule.Evaluate($Uniqueness)
                            'Adding new value to the convertion table: $ConvetionTable[{0}] = {1}' -f $MatchedValue, $ConvertionTable[$MatchedValue] | Write-Verbose 
                            $Uniqueness++
                        }

                        # This MatchedValue exists, use it.
                        $Replacement = $ConvertionTable[$MatchedValue]
                    }
                    else {
                        $Replacement = $Rule.Evaluate($LineNumber)
                    }

                    $null = $StrSB.Remove($Match.Index, $Match.Length)
                    $null = $StrSB.Insert($Match.Index, $Replacement)
                }

                $CurrentString = $StrSB.ToString()
            }
        } # foreach($Rule in $ReductionRule)

        if ($AsObject) {
            $OutputProperties = @{
                LineNumber    = $LineNumber
                CurrentString = $CurrentString
                Original      = $InputObject
                Changed       = $CurrentStringChanged
            }

            $OutputPropertiesList = 'LineNumber', 'CurrentString', 'Original', 'Changed'

            if ($Consistent) {
                $OutputProperties['Uniqueness'] = $Uniqueness
                $OutputPropertiesList += 'Uniqueness'
            }

            New-Object -TypeName PSCustomObject -Property $OutputProperties | Select-Object $OutputPropertiesList
        }
        else {
            $CurrentString
        }

        #region Write-Progress calculation block
        if ($TotalLines -gt $LineNumber) {
            $PercentComplete += $PercentStep
            $ElapsedSeconds = $StopWatch.Elapsed.TotalSeconds
            $StopWatch.Restart()
            [double]$AverageTime = ($AverageTime * $LineNumber + $ElapsedSeconds) / ($LineNumber + 1)
            [int]$SecondsRemaining = $AverageTime * ($TotalLines - $LineNumber)
            'L = {0} | Avg = {1} | Remain(S) = {2}' -f $LineNumber, $AverageTime, $ElapsedSeconds, $SecondsRemaining | Write-Debug
        }

        Write-Progress -Activity "Redacting sensitive data. Line Number: $LineNumber out of $TotalLines" -Id 2 -ParentId 1 -PercentComplete $PercentComplete -SecondsRemaining $SecondsRemaining
        #endregion

        $LineNumber++
    } # Process

    end {
        #region Write-Progress calculation block closing
        $StopWatch.Stop()        
        Write-Progress -Activity "[Done] Redacting sensitive data [Done]" -Id 2 -ParentId 1 -Completed
        #endregion

        if (-not [string]::IsNullOrWhiteSpace($OutConvertionTable)) {
            '$PSCmdlet.MyInvocation.CommandOrigin: {0}' -f $PSCmdlet.MyInvocation.CommandOrigin | Write-Debug
            if ($PSCmdlet.MyInvocation.CommandOrigin -eq 'Runspace') {
                $PSCmdlet.SessionState.PSVariable.Set($OutConvertionTable, $ConvertionTable)
            }
            else {
                # CommandOrigin: Internal
                Set-Variable -Name $OutConvertionTable -Value $ConvertionTable -Scope 2
            }
        }
    }
}
<#
.SYNOPSIS
Creates new redaction rule.
 
.DESCRIPTION
Creates new redaction rule with regex pattern to look for and NewValue to replace with.
 
.PARAMETER Pattern
Regex pattern
 
.PARAMETER NewValueFunction
Script block to generate new generic data, the result is then put instead of the original value.
This script block can accept at most 1 int parameter with $args[0] or declare variable in param() block
 
.PARAMETER NewValueString
String value to be replaced instead of pattern. The string can contain place holder {0}, and it will be replaced with uniqueness factor.
 
.PARAMETER CommonRule
Predefined rules - patterns and values
 
.EXAMPLE
New-RedactionRule '(?<=\().*(?=\))' 'Process_{0}'
 
.EXAMPLE
Mark '[a-z]' { [long]$p = $args[0]; [char]($p % 26 + 65) }
 
.EXAMPLE
Mark -CommonRule IPV4Address
 
.NOTES
 
#>

Function New-RedactionRule {
    [Diagnostics.CodeAnalysis.SuppressMessageAttribute("PSUseShouldProcessForStateChangingFunctions", "")]
    [Alias('New-SanitizationRule','New-MarkingRule','Mark')] # Usually Single word is an automatic alias for Get-<SingleWord>
    [OutputType([RedactionRule])]
    [CmdletBinding(DefaultParameterSetName = 'CustomFunction')]
    param(
        # Regex pattern with 1 named capturing group at most
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomString')]
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'CustomFunction')]
        [string]$Pattern,
        # Value can contain {0} so counter value will be added
        [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomFunction')]
        [scriptblock]$NewValueFunction,
        [Parameter(Mandatory = $true, Position = 1, ParameterSetName = 'CustomString')]
        [String]$NewValueString,
        [Parameter(Mandatory = $true, Position = 0, ParameterSetName = 'Common')]
        [ValidateSet('IPV4Address')]   
        [string]$CommonRule
    )

    if ($PSCmdlet.ParameterSetName -eq 'Common') {
        $Script:CommonRuleTable[$CommonRule]
    }
    elseif($PSCmdlet.ParameterSetName -eq 'CustomFunction') {
        New-Object RedactionRule($Pattern, $NewValueFunction)
    }
    elseif($PSCmdlet.ParameterSetName -eq 'CustomString') {
        New-Object RedactionRule($Pattern, $NewValueString)
    }
}

$Script:CommonRuleTable = @{
    'IPV4Address' = New-RedactionRule -Pattern '\b(\d{1,3}(\.\d{1,3}){3})\b' -NewValueFunction ${Function:Convert-IPValue}
    #'IPV6Address' = New-RedactionRule -Pattern '\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b' -NewValueFunction ${Function:Generate-IPValue}
    #'MACAddress' = New-RedactionRule -Pattern '\b([0-9A-F]{2}[:-]){5}([0-9A-F]{2})\b' -NewValueFunction ${Function:Generate-IPValue}
    #'GUID' = New-RedactionRule -Pattern '\b[{(]?[0-9A-F]{8}[-]?(?:[0-9A-F]{4}[-]?){3}[0-9A-F]{12}[)}]?\b' -NewValueFunction ${Function:Generate-IPValue}
}
Export-ModuleMember -Function * -Alias * -Cmdlet *