Get-Base64RegularExpression.ps1

<#PSScriptInfo
.VERSION 1.2
.GUID 12211d8d-6460-461b-b544-85556f82b140
.AUTHOR Lee Holmes
.DESCRIPTION Get a regular expression that can be used to search for content that has been Base64 encoded
#>


param(
    ## The value that we would like to search for in Base64 encoded content
    [Parameter(Mandatory, ValueFromPipeline)]
    $Value,

    ## True if we should look for Unicode encodings of the string. Otherwise,
    ## assumed to be OEM / Default encoding
    [Parameter()]
    [Switch] $Unicode,

    ## True if we should emit the raw strings of each Base64 encoding
    [Parameter()]
    [Switch] $Raw
)

begin
{
    $base64sequences = @()
}

process
{
    ## Holds the various byte representations of what we're searching for
    $byteRepresentations = @()

    ## If we got a string, look for the representation of the string
    if($Value -is [String])
    {
        if($Unicode.IsPresent)
        {
            $byteRepresentations += 
                ,[Byte[]] [System.Text.Encoding]::Unicode.GetBytes($Value)
        }
        else
        {
            $byteRepresentations += 
                ,[Byte[]] [System.Text.Encoding]::Default.GetBytes($Value)        
        }
    }

    ## If it was a byte array directly, look for the byte representations
    if($Value -is [byte[]])
    {
        $byteRepresentations += ,$Value
    }

    ## Find the safe searchable sequences for each Base64 representation of input bytes
    $base64sequences += foreach($bytes in $byteRepresentations)
    {
        ## Offset 0. Sits on a 3-byte boundary so we can trust the leading characters.
        $offset0 = [Convert]::ToBase64String($bytes)

        ## Offset 1. Has one byte from preceeding content, so we need to throw away the
        ## first 2 leading characters
        $offset1 = [Convert]::ToBase64String( (New-Object 'Byte[]' 1) + $bytes ).Substring(2)

        ## Offset 2. Has two bytes from preceeding content, so we need to throw away the
        ## first 3 leading characters
        $offset2 = [Convert]::ToBase64String( (New-Object 'Byte[]' 2) + $bytes ).Substring(3)


        ## If there is any terminating padding, we must remove the characters mixed with that padding. That
        ## ends up being the number of equals signs, plus one.
        $base64matches = $offset0,$offset1,$offset2 | % {
            if($_ -match '(=+)$')
            {
                $_.Substring(0, $_.Length - ($matches[0].Length + 1))
            }
            else
            {
                $_
            }
        }

        $base64matches | ? { $_ }
    }
}

end
{
    if($Raw.IsPresent)
    {
        $base64sequences | Sort-Object -Unique
    }
    else
    {
        ## Output a regular expression for these sequences
        "(" + (($base64sequences | Sort-Object -Unique | % { [Regex]::Escape($_) }) -join "|") + ")"
    }
}