loader.psm1

# this dummy class is used to determine whether
# a file has a bom encoding:
class NullEncoder : System.Text.UTF8Encoding
{
}

function Get-PsOneEncoding
{
  <#
      .SYNOPSIS
      Gets Encoding for BOM and Non-BOM text files.
 
      .DESCRIPTION
      Returns the encoding of text files.
      For BOM-encoded files, the fast .NET methods are used, and confidence level is always 100%.
      For Non-BOM-encoded files, extensive heuristicts are applied, and confidence level varies depending on file content.
      Heuristics are calculated by a porting of the Mozilla Universal Charset Detector (https://github.com/errepi/ude)
      Important: this library is subject to the Mozilla Public License Version 1.1, alternatively licensed
      either under terms of GNU General Public License Version 2 or later, or GNU Lesser General Public License Version 2.1 or later.
 
      .PARAMETER Path
      Path to text file
 
      .PARAMETER BomOnly
      Returns information for BOM-encoded files only.
 
      .EXAMPLE
      Get-PsOneEncoding -Path c:\sometextfile.txt
      Returns the encoding of the text file specified
 
      .EXAMPLE
      Get-ChildItem -Path $home -Filter *.txt -Recurse | Get-PsOneEncoding
      Returns the encoding of any text file found anywhere in the current user profile.
 
      .NOTES
      Make sure you respect the license terms of the ported charset detector DLL.
 
      .LINK
      https://github.com/TobiasPSP/GetEncoding
      https://github.com/errepi/ude
      https://techblog.dorogin.com/changing-source-files-encoding-and-some-fun-with-powershell-df23bf8410ab
  #>



  param
  (
    [Parameter(ValueFromPipeline,ValueFromPipelineByPropertyName,Mandatory)]
    [Alias('FullName')]
    [string]
    $Path,
    
    [switch]
    $BomOnly
  )    
  begin
  {
    # load charset detector dll:
    Add-Type -Path $PSScriptRoot\Ude.dll
    $cdet = [Ude.CharsetDetector]::new()
    $nullEncoder = [NullEncoder]::new()
  }
  process 
  {
    # try and read the BOM encoding:
    # submit a dummy encoder class that is used if the encoding cannot be
    # determined from BOM. This way we know that additional heuristic
    # analysis is needed:
    $reader = [System.IO.StreamReader]::new($Path,$nullEncoder,$true)
    # must read the file at least once to get encoding:
    $null = $reader.Peek()
    $encoding = $reader.CurrentEncoding
    $reader.Close()
    $reader.Dispose()
    # if the encoding equals default encoding then there was no bom:
    $bom = $encoding -ne $nullEncoder
    $bodyname = $encoding.BodyName
    $confidence = 100
    
    # if there was no bom and non-bom files were not excluded...
    if (($bom -eq $false) -and ($BomOnly.IsPresent -eq $false))
    {
      # ...do a heuristic analysis based on file content:
      [System.IO.FileStream]$stream = [System.IO.File]::OpenRead($Path)
      $cdet.Feed($stream)
      $cdet.DataEnd()
      $bodyname = $cdet.Charset
      $confidence = [int]($cdet.Confidence * 100)
      # add a workaround for the awkward default encoding created
      # by Set-Content on Windows PowerShell:
      if ($confidence -eq 0 -and $bodyname -eq $null)
      {
        $confidence = 25
        $bodyname = 'ANSI'
      }
      $stream.Close()
      $stream.Dispose()
    }
    
    # return findings as a custom object:
    if ($bom -or !$BomOnly.IsPresent)
    {
      [PSCustomObject]@{
        BOM          = $bom
        Encoding     = $bodyName.ToUpper()
        Confidence   = $confidence
        Path = $Path
      }
    }
  }
}

Set-Alias -Name Get-Encoding -Value Get-PsOneEncoding