Private/Utils/Get-FileEncoding.ps1

<#
    .SYNOPSIS
        Guess encoding of text file

    .PARAMETER Path
        Path to file to examine

    .OUTPUTS
        [Encoding] object of detected encoding

    .LINK
        https://unicodebook.readthedocs.io/guess_encoding.html
#>

function Get-FileEncoding
{
    param
    (
        [string]$Path
    )

    if (-not (Test-Path -Path $Path -PathType Leaf))
    {
        throw "File not found: $Path"
    }

    $bytes = [IO.File]::ReadAllBytes((Resolve-Path -Path $Path).Path)

    try
    {
        # 1. Check BOM
        if ($bytes.Length -ge 3 -and $bytes[0] -eq 0xEF -and $bytes[1] -eq 0xBB -and $bytes[2] -eq 0xBF)
        {
            return New-Object System.Text.UTF8Encoding($true)
        }

        if ($bytes.Length -ge 4)
        {
            if ($bytes[0] -eq 0x00 -and $bytes[1] -eq 0x00 -and $bytes[2] -eq 0xFE -and $bytes[3] -eq 0xFF)
            {
                # UTF32-LE
                return New-Object System.Text.UTF32Encoding($false, $true)
            }

            if ($bytes[0] -eq 0xFF -and $bytes[1] -eq 0xFE -and $bytes[2] -eq 0x00 -and $bytes[3] -eq 0x00)
            {
                # UTF32-BE
                return New-Object System.Text.UTF32Encoding($true, $true)
            }
        }

        if ($bytes.Length -ge 2)
        {
            if ($bytes[0] -eq 0xFE -and $bytes[1] -eq 0xFF)
            {
                # UTF-16 LE
                return New-Object System.Text.UTF32Encoding($false, $true)
            }

            if ($bytes[0] -eq 0xFF -and $bytes[1] -eq 0xFE)
            {
                # UTF-16 BE
                return New-Object System.Text.UTF32Encoding($true, $true)
            }
        }

        # Read rest of file and guess encoding
        $isUnicode = $false

        for ($i = 0; $i -lt $bytes.Length; ++$i)
        {
            $byte = $bytes[$i]

            if ($byte -lt 32 -and (9, 10, 13) -inotcontains $byte)
            {
                # CTRL char and not whitespace
                $isUnicode = $true
            }

            if ($byte -lt 0x7F)
            {
                # 1 byte sequence: U+0000..U+007F
                continue
            }

            $isUnicode = $true

            if (0xC2 -le $byte -and $byte -le 0xDF)
            {
                # 0b110xxxxx: 2 bytes sequence
                $codeLength = 2
            }
            elseif (0xE0 -le $byte -and $byte -le 0xEF)
            {
                # 0b1110xxxx: 3 bytes sequence
                $codeLength = 3
            }
            elseif (0xF0 -le $byte -and $byte -le 0xF4)
            {
                # 0b11110xxx: 4 bytes sequence
                $codeLength = 4
            }
            else
            {
                # Unicode - going to assume LE as windows and moxt linux run on x86 architecture
                return New-Object System.Text.UTF32Encoding($false, $false)
            }

            if ($i + $codeLength - 1 -ge $bytes.Length)
            {
                # truncated string or invalid byte sequence
                throw "Invalid text file format - cannot determine encoding"
            }

            # Check continuation bytes: bit 7 should be set, bit 6 should be
            # unset (b10xxxxxx).
            for ($j = 1; $j -lt $codeLength; ++$j)
            {
                if ($bytes[$i + $j] -band 0xC0 -ne 0x80)
                {
                    # Unicode - going to assume LE as windows and moxt linux run on x86 architecture
                    return New-Object System.Text.UTF32Encoding($false, $false)
                }
            }

            if ($codeLength -eq 2)
            {
                # 2 bytes sequence: U+0080..U+07FF
                $b0 = [int]$bytes[$i]
                $b1 = [int]$bytes[$i + 1]
                $ch = (($b0 -band 0x1f) -shl 6) + ($b1 -band 0x3f)

                if ($ch -ge 0x0800)
                {
                    # Unicode - going to assume LE as windows and moxt linux run on x86 architecture
                    return New-Object System.Text.UTF32Encoding($false, $false)
                }
            }
            elseif ($codeLength -eq 3)
            {
                # 3 bytes sequence: U+0800..U+FFFF
                $b0 = [int]$bytes[$i]
                $b1 = [int]$bytes[$i + 1]
                $b2 = [int]$bytes[$i + 2]
                $ch = (($b0 -band 0x0f) -shl 12) + (($b1 -band 0x3f) -shl 6) + ($b2 -band 0x3f)

                if ($ch -lt 0x0800)
                {
                    # Unicode - going to assume LE as windows and moxt linux run on x86 architecture
                    return New-Object System.Text.UTF32Encoding($false, $false)
                }
            }
            elseif ($codeLength -eq 4)
            {
                # 4 bytes sequence: U+10000..U+10FFFF
                $b0 = [int]$bytes[$i]
                $b1 = [int]$bytes[$i + 1]
                $b2 = [int]$bytes[$i + 2]
                $b2 = [int]$bytes[$i + 3]
                $ch = (($b0 -band 0x07) -shl 18) + (($b1 -band 0x3f) -shl 12) + (($b2 -band 0x3f) -shl 6) + ($b3 -band 0x3f)

                if (($ch -lt 0x10000) -or (0x10FFFF -lt $ch))
                {
                    # Unicode - going to assume LE as windows and moxt linux run on x86 architecture
                    return New-Object System.Text.UTF32Encoding($false, $false)
                }
            }
        }

        # If we make it here, then UTF8 (unicode) no BOM or ASCII
        if ($isUnicode)
        {
            return New-Object System.Text.UTF8Encoding($false, $false)
        }

        return New-Object System.Text.ASCIIEncoding
    }
    finally
    {
        # Garbage collect
        $bytes = $null
    }
}