public/ConvertTo-SafeEntities.ps1
|
<#
.SYNOPSIS Encode text as XML/HTML, escaping all characters outside 7-bit ASCII. .INPUTS System.String of HTML or XML data to encode. .OUTPUTS System.String of HTML or XML data, encoded. .FUNCTIONALITY Unicode .LINK https://docs.microsoft.com/dotnet/api/system.char.issurrogatepair .LINK https://docs.microsoft.com/dotnet/api/system.char.converttoutf32 .EXAMPLE "$([char]0xD83D)$([char]0xDCA1) File $([char]0x2192) Save" |ConvertTo-SafeEntities 💡 File → Save This shows a UTF-16 surrogate pair, used internally by .NET strings, which is combined into a single entity reference. .EXAMPLE "ETA: $([char]0xBD) hour" |ConvertTo-SafeEntities ETA: ½ hour #> #Requires -Version 3 [CmdletBinding()][OutputType([string])] Param( <# An HTML or XML string that may include emoji or other Unicode characters outside the 7-bit ASCII range. #> [Parameter(Position=0,Mandatory=$true,ValueFromPipeline=$true)][string] $InputObject, # Indicates that markdown characters should also be escaped. [switch] $IncludeMarkupChars ) Process { if($IncludeMarkupChars) {return [Text.Encodings.Web.HtmlEncoder]::Default.Encode($InputObject)} else { [char[]] $chars = for ($i = 0; $i -lt $InputObject.Length; $i++) { [int] $c = [char]$InputObject[$i] Write-Verbose "$i : $c" if([char]::IsSurrogatePair($InputObject,$i)) { ('&#x{0:X};' -f [char]::ConvertToUtf32($InputObject,$i++)).GetEnumerator() } elseif(0x7F -lt $c) { ('&#x{0:X};' -f $c).GetEnumerator() } else { [char]$c } } return New-Object String $chars,0,$chars.Length } } |