Unicodery.psm1


function Add-ScopeLevel
{
<#
.SYNOPSIS
Convert a scope level to account for another call stack level.
 
.DESCRIPTION
For scripts that need to get or set a variable of a specific scope so that it disappears at
the end of a block/function/script, or so that it persists globally, this calculates the
additional call level added by that script.
 
.INPUTS
System.String containing the desired level.
 
.OUTPUTS
System.String containing the calculated level (Global or an integer).
 
.LINK
Stop-ThrowError
 
.LINK
Get-PSCallStack
 
.LINK
about_Scopes
 
.FUNCTIONALITY
PowerShell
 
.EXAMPLE
Add-ScopeLevel Local
 
1
 
.EXAMPLE
Add-ScopeLevel 3
 
4
 
.EXAMPLE
Add-ScopeLevel Global
 
Global
#>


[CmdletBinding()][OutputType([string])] Param(
# The requested scope from the caller of the caller of this script.
# Global, Local, Private, Script, or a positive integer.
[Parameter(Position=0,Mandatory=$true,ValueFromPipeline=$true)][string] $Scope,
# The scope will be used within the module, rather than the module's caller.
[switch] $Internal
)
Process
{
    $offset = $Internal ? 1 : 3
    if($Scope -match '\A\d+\z') {return "$($offset+[int]$Scope)"}
    switch($Scope)
    {
        Global  {return 'Global'}
        # the module scope seems to implicitly add a level
        Local   {return "$offset"}
        Private {return "$offset"}
        Script
        {
            $stack = Get-PSCallStack
            for($i = $offset+1; $i -lt $stack.Length; $i++)
            {
                if($stack[$i].Command -and $stack[$i].FunctionName -like '<ScriptBlock>*') {return "$($offset+$i-2)"}
            }
            throw 'Unable to find Script scope'
        }
    }
}

}

function Get-UnicodeData
{
<#
.SYNOPSIS
Returns the current (cached) Unicode character data.
 
.OUTPUTS
System.Management.Automation.PSCustomObject for each character entry with these properties:
* BidirectionalCategory
* Catgory
* CombiningClass
* Comment
* DecimalDigitValue
* DecompositionMapping
* DigitValue
* Lower
* Mirrored
* Name
* NumericValue
* OldName
* Title
* Upper
* Value
 
.FUNCTIONALITY
Unicode
 
.LINK
https://www.unicode.org/L2/L1999/UnicodeData.html
 
.EXAMPLE
Get-UnicodeData |Export-Csv data/UnicodeData.csv
 
Saves the current Unicode data as a CSV file.
#>


[CmdletBinding()][OutputType([pscustomobject])] Param(
# The source location of the latest Unicode data.
[uri] $Url = 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt',
# The local location to cache the data to.
[string] $DataFile = (Join-Path ([io.path]::GetTempPath()) ($Url.Segments[-1]))
)

function Save-Data
{
    [CmdletBinding()] Param()
    if(!(Test-Path $DataFile -Type Leaf))
    {
        $http = Invoke-WebRequest $Url -OutFile $DataFile -PassThru
        Write-Information "Downloaded $Url to $DataFile"
        [datetime] $lastmod = "$($http.Headers['Last-Modified'])"
        (Get-Item $DataFile).LastWriteTime = $lastmod
    }
    else
    {
        $http = Invoke-WebRequest $Url -Method Head
        [datetime] $lastmod = "$($http.Headers['Last-Modified'])"
        if((Get-Item $DataFile).LastWriteTime -lt $lastmod)
        {
            Invoke-WebRequest $Url -OutFile $DataFile
            Write-Information "Updated $Url to $(Join-Path $PWD $DataFile)"
            (Get-Item $DataFile).LastWriteTime = $lastmod
        }
    }
}

function Read-Data
{
    [CmdletBinding()] Param()
    Import-Csv $DataFile -Delimiter ';' -Header Value,Name,Catgory,CombiningClass,BidirectionalCategory,
        DecompositionMapping,DecimalDigitValue,DigitValue,NumericValue,Mirrored,OldName,Comment,
        Upper,Lower,Title
}

Save-Data
Read-Data

}

function Get-CharacterDetails
{
<#
.SYNOPSIS
Returns filterable categorical information about characters in the Unicode Basic Multilingual Plane.
 
.INPUTS
System.String to get details on each character of.
 
.OUTPUTS
System.Management.Automation.PSCustomObject with the following properties:
 
Character
  The character these details apply to.
 
Value
  The integer codepoint value of the character.
 
CodePoint
  The Unicode code point, U+9999 formatted.
 
UnicodeBlock
  The Unicode (not .NET) block the character falls into.
 
MatchesBlock
  True if the character matches the \p{IsUnicodeBlock} regular expression
  (where "UnicodeBlock" is the character's UnicodeBlock property).
  Error if the character's UnicodeBlock property is not supported by .NET.
 
UnicodeCategory
  The .NET UnicodeCategory returned by System.Char.GetUnicodeCategory().
 
CategoryClasses
  The list of Unicode general category classes that will match the character.
 
PasswordCategory
  The passfilt.dll category of the character:
  Uppercase, Lowercase, Caseless, Digit, or Special.
  ActiveDirectory complexity rules typically require a character from at least
  three of these fairly arbitrary categories.
 
XmlEscape
  The result of XML-encoding the character using
  System.Security.SecurityElement.Escape().
 
HtmlAttributeEncode
  The result of HTML-encoding the character using
  System.Web.HttpUtility.HtmlAttributeEncode().
 
UrlEncode
  The result of URL-encoding the character using
  System.Net.WebUtility.UrlEncode().
 
HttpUrlEncode
  The result of URL-encoding a string containing the character using the venerable
  System.Web.HttpUtility.UrlEncode().
 
UrlEncodeUnicode
  The result of URL-encoding the character using the deprecated
  System.Web.HttpUtility.UrlEncodeUnicode().
  This is the only URL-encoding method in .NET that seems to support encoding
  characters to the %uFFFF syntax, rather than trying to encode characters into
  individual UTF-8 bytes and URL-encoding each of those.
 
EscapeDataString
  The result of URL-encoding the character using System.Uri.EscapeDataString(),
  or the name of the exception thrown, usually MethodInvocationException for surrogates.
 
EscapeUriString
  The result of URL-encoding the character using System.Uri.EscapeUriString(),
  or the name of the exception thrown, usually MethodInvocationException for surrogates.
 
UrlPathEncode
  The result of URL-encoding the character using
  System.Web.HttpUtility.UrlPathEncode().
 
IsControl
  The value returned by System.Char.IsControl().
  Indicates whether the specified Unicode character is categorized as a control character.
  When true, the character should match \p{C} in regular expressions.
 
IsDigit
  The value returned by System.Char.IsDigit().
  Indicates whether the specified Unicode character is categorized as a decimal digit.
  When true, the character should match \p{Nd} or \d in regular expressions.
 
IsHighSurrogate
  The value returned by System.Char.IsHighSurrogate().
  Indicates whether the specified Char object is a high surrogate.
  Surrogates are used to compose supplementary characters outside the Basic Multilingual
  Plane (BMP, the first 65,536 Unicode codepoints).
 
IsLegalUserName
  True if the character is valid in a Windows username.
 
IsLegalFileName
  True if the character is valid in a Windows path.
 
IsLetter
  The value returned by System.Char.IsLetter().
  Indicates whether the specified Unicode character is categorized as a Unicode letter.
  When true, the character should match \p{L} in regular expressions.
 
IsLetterOrDigit
  The value returned by System.Char.IsLetterOrDigit().
  Indicates whether the specified Unicode character is categorized as a letter or a decimal digit.
 
IsLower
  The value returned by System.Char.IsLower().
  Indicates whether the specified Unicode character is categorized as a lowercase letter.
  When true, the character should match \p{Ll} in regular expressions.
 
IsLowSurrogate
  The value returned by System.Char.IsLowSurrogate().
  Indicates whether the specified Char object is a low surrogate.
  Surrogates are used to compose supplementary characters outside the Basic Multilingual
  Plane (BMP, the first 65,536 Unicode codepoints).
 
IsMark
  True if the character matches the regular expression \p{M}.
  This indicates the character is categorized as a diacritic mark.
 
IsNumber
  The value returned by System.Char.IsNumber().
  Indicates whether the specified Unicode character is categorized as a number.
  When true, the character should match \p{N} in regular expressions.
 
IsPunctuation
  The value returned by System.Char.IsPunctuation().
  Indicates whether the specified Unicode character is categorized as a punctuation mark.
  When true, the character should match \p{P} in regular expressions.
 
IsSeparator
  The value returned by System.Char.IsSeparator().
  Indicates whether the specified Unicode character is categorized as a separator character.
  When true, the character should match \p{Z} in regular expressions.
 
IsSurrogate
  The value returned by System.Char.IsSurrogate().
  Indicates whether the specified character has a surrogate code unit.
  Surrogates are used to compose supplementary characters outside the Basic Multilingual
  Plane (BMP, the first 65,536 Unicode codepoints).
  When true, the character should match \p{Cs} in regular expressions.
 
IsSymbol
  The value returned by System.Char.IsSymbol().
  Indicates whether the specified Unicode character is categorized as a symbol character.
  When true, the character should match \p{S} in regular expressions.
 
IsUpper
  The value returned by System.Char.IsUpper().
  Indicates whether the specified Unicode character is categorized as an uppercase letter.
  When true, the character should match \p{Lu} in regular expressions.
 
IsWhiteSpace
  The value returned by System.Char.IsWhiteSpace().
  Indicates whether the specified Unicode character is categorized as white space.
  When true, the character should match \p{Zs} or \s in regular expressions.
 
IsWord
  True if the character matches the regular expression \w.
  This indicates the character is categorized as a "word" (alphanumeric) character,
  including:
 
* L All letters, including:
  * Ll Letter, lowercase
  * Lu Letter, uppercase
  * Lt Letter, titlecase
  * Lo Letter, other
  * Lm Letter, modifier
* Nd Number, decimal digit
* Pc Punctuation, connector (includes _)
 
.FUNCTIONALITY
Unicode
 
.COMPONENT
System.Web
 
.LINK
http://unicode.org/
 
.LINK
https://msdn.microsoft.com/library/system.char.aspx
 
.LINK
https://msdn.microsoft.com/library/system.uri.aspx
 
.LINK
https://msdn.microsoft.com/library/system.globalization.unicodecategory.aspx
 
.LINK
https://msdn.microsoft.com/library/windows/desktop/ms722458.aspx
 
.LINK
https://msdn.microsoft.com/library/system.net.webutility.aspx
 
.LINK
https://msdn.microsoft.com/library/system.web.httputility.aspx
 
.LINK
https://msdn.microsoft.com/library/20bw873z.aspx
 
.LINK
https://msdn.microsoft.com/library/windows/desktop/dd374069.aspx
 
.LINK
https://technet.microsoft.com/library/bb726984.aspx
 
.LINK
https://msdn.microsoft.com/library/system.io.path.getinvalidfilenamechars.aspx
 
.LINK
https://docs.microsoft.com/dotnet/core/compatibility/3.1-5.0#unicode-category-changed-for-some-latin-1-characters
 
.EXAMPLE
Get-CharacterDetails ASCII |Out-GridView
 
Learn everything about 7-bit ASCII, the first 128 characters in the Unicode standard.
 
.EXAMPLE
Get-CharacterDetails GeneralPunctuation -IsSymbol
 
Returns the two characters in the GeneralPunctuation block categorized as symbols.
 
.EXAMPLE
Get-CharacterDetails ASCII -IsWord -NotLetter -NotDigit
 
 
Character : _
Value : 95
CodePoint : U+005F
UnicodeBlock : BasicLatin
MatchesBlock : True
UnicodeCategory : ConnectorPunctuation
CategoryClasses : {Pc, P}
XmlEncode : _
HtmlAttributeEncode : _
UrlEncode : _
HttpUrlEncode : _
UrlEncodeUnicode : _
EscapeDataString : _
EscapeUriString : _
UrlPathEncode : _
IsControl : False
IsDigit : False
IsHighSurrogate : False
IsLegalUserName : True
IsLegalFileName : True
IsLetter : False
IsLetterOrDigit : False
IsLower : False
IsLowSurrogate : False
IsMark : False
IsNumber : False
IsPunctuation : True
IsSeparator : False
IsSurrogate : False
IsSymbol : False
IsUpper : False
IsWhiteSpace : False
IsWord : True
#>


#TODO: finish documenting params
[Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSReviewUnusedParameter','',
Justification='The values are captured in function bodies.')]
[CmdletBinding()][OutputType([Management.Automation.PSCustomObject])] Param(
# A specific Unicode block (or named range) of characters to return.
[Parameter(ParameterSetName='Block',Position=0)]
[ValidateSet('BasicMultilingualPlane','BMP','ASCII','0x2xxx','BasicLatin','Latin1Supplement',
'LatinExtendedA','LatinExtendedB','IPAExtensions','SpacingModifierLetters','CombiningDiacriticalMarks',
'GreekandCoptic','Cyrillic','CyrillicSupplement','Armenian','Hebrew','Arabic','Syriac','ArabicSupplement',
'Thaana','NKo','Samaritan','Mandaic','ArabicExtendedA','Devanagari','Bengali','Gurmukhi','Gujarati','Oriya',
'Tamil','Telugu','Kannada','Malayalam','Sinhala','Thai','Lao','Tibetan','Myanmar','Georgian','HangulJamo',
'Ethiopic','EthiopicSupplement','Cherokee','UnifiedCanadianAboriginalSyllabics','Ogham','Runic','Tagalog',
'Hanunoo','Buhid','Tagbanwa','Khmer','Mongolian','UnifiedCanadianAboriginalSyllabicsExtended','Limbu','TaiLe',
'NewTaiLue','KhmerSymbols','Buginese','TaiTham','CombiningDiacriticalMarksExtended','Balinese','Sundanese',
'Batak','Lepcha','OlChiki','SundaneseSupplement','VedicExtensions','PhoneticExtensions',
'PhoneticExtensionsSupplement','CombiningDiacriticalMarksSupplement','LatinExtendedAdditional','GreekExtended',
'GeneralPunctuation','SuperscriptsandSubscripts','CurrencySymbols','CombiningDiacriticalMarksforSymbols',
'LetterlikeSymbols','NumberForms','Arrows','MathematicalOperators','MiscellaneousTechnical','ControlPictures',
'OpticalCharacterRecognition','EnclosedAlphanumerics','BoxDrawing','BlockElements','GeometricShapes',
'MiscellaneousSymbols','Dingbats','MiscellaneousMathematicalSymbolsA','SupplementalArrowsA','BraillePatterns',
'SupplementalArrowsB','MiscellaneousMathematicalSymbolsB','SupplementalMathematicalOperators',
'MiscellaneousSymbolsandArrows','Glagolitic','LatinExtendedC','Coptic','GeorgianSupplement','Tifinagh',
'EthiopicExtended','CyrillicExtendedA','SupplementalPunctuation','CJKRadicalsSupplement','KangxiRadicals',
'IdeographicDescriptionCharacters','CJKSymbolsandPunctuation','Hiragana','Katakana','Bopomofo',
'HangulCompatibilityJamo','Kanbun','BopomofoExtended','CJKStrokes','KatakanaPhoneticExtensions',
'EnclosedCJKLettersandMonths','CJKCompatibility','CJKUnifiedIdeographsExtensionA','YijingHexagramSymbols',
'CJKUnifiedIdeographs','YiSyllables','YiRadicals','Lisu','Vai','CyrillicExtendedB','Bamum','ModifierToneLetters',
'LatinExtendedD','SylotiNagri','CommonIndicNumberForms','Phagspa','Saurashtra','DevanagariExtended','KayahLi',
'Rejang','HangulJamoExtendedA','Javanese','MyanmarExtendedB','Cham','MyanmarExtendedA','TaiViet',
'MeeteiMayekExtensions','EthiopicExtendedA','LatinExtendedE','MeeteiMayek','HangulSyllables',
'HangulJamoExtendedB','HighSurrogates','HighPrivateUseSurrogates','LowSurrogates','PrivateUseArea',
'CJKCompatibilityIdeographs','AlphabeticPresentationForms','ArabicPresentationFormsA','VariationSelectors',
'VerticalForms','CombiningHalfMarks','CJKCompatibilityForms','SmallFormVariants','ArabicPresentationFormsB',
'HalfwidthandFullwidthForms','Specials')][string]$Block = 'BasicMultilingualPlane',
# A string containing one or more characters to get details for.
[Parameter(ParameterSetName='Char',Mandatory=$true,ValueFromPipeline=$true)][string]$Char,
# A codepoint to get details for.
[Parameter(ParameterSetName='Value',Position=0,Mandatory=$true)][int][Alias('CodePoint')]$Value,
# The minimum character in the range to return.
[Parameter(ParameterSetName='Range',Position=0,Mandatory=$true)][int]$StartValue,
# The maximum character in the range to return.
[Parameter(ParameterSetName='Range',Position=1,Mandatory=$true)][int]$StopValue,
[switch]$IsControl,
[switch]$NotControl,
[switch]$IsDigit,
[switch]$NotDigit,
[switch]$IsHighSurrogate,
[switch]$NotHighSurrogate,
[switch]$IsLegalUserName,
[switch]$NotLegalUserName,
[switch]$IsLegalFileName,
[switch]$NotLegalFileName,
[switch]$IsLetter,
[switch]$NotLetter,
[switch]$IsLetterOrDigit,
[switch]$NotLetterOrDigit,
[switch]$IsLower,
[switch]$NotLower,
[switch]$IsLowSurrogate,
[switch]$NotLowSurrogate,
[switch]$IsMark,
[switch]$NotMark,
[switch]$IsNumber,
[switch]$NotNumber,
[switch]$IsPunctuation,
[switch]$NotPunctuation,
[switch]$IsSeparator,
[switch]$NotSeparator,
[switch]$IsSurrogate,
[switch]$NotSurrogate,
[switch]$IsSymbol,
[switch]$NotSymbol,
[switch]$IsUpper,
[switch]$NotUpper,
[switch]$IsWhiteSpace,
[switch]$NotWhiteSpace,
[switch]$IsWord,
[switch]$NotWord
)
Begin
{
    try{[void][Web.HttpUtility]}catch{Add-Type -AN System.Web}
    try{[void][PasswordCharacter]}catch{if($IsWindows){Add-Type -TypeDefinition @'
using System;
using System.Runtime.InteropServices;
public class PasswordCharacter
{
    [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
    private static extern int GetStringTypeW(int dwInfoType, string lpSrcStr, int cchSrc, out ushort lpCharType);
    public enum CharacterType { None = 0, Uppercase, Lowercase, Caseless, Digit, Special }
    static public CharacterType GetCharacterType(char charvalue)
    {
        ushort chartype = 0;
        int errcode = GetStringTypeW(1,charvalue.ToString(),1,out chartype);
        if(errcode == 87) {throw new ArgumentOutOfRangeException("Bad parameter.");}
        if(errcode == 1004) {throw new ArgumentOutOfRangeException("Bad flags.");}
        if((chartype & 1) != 0) {return CharacterType.Uppercase;}
        if((chartype & 2) != 0) {return CharacterType.Lowercase;}
        if((chartype & 0x100) != 0) {return CharacterType.Caseless;}
        if((chartype & 4) != 0) {return CharacterType.Digit;}
        return CharacterType.Special;
    }
}
'@
}}
    # Only some blocks are supported: https://msdn.microsoft.com/library/20bw873z.aspx#SupportedNamedBlocks
    function Find-UnicodeRangeBlock([int]$c)
    {
        if($c -le 0x007F) {'BasicLatin'}
        elseif($c -le 0x00FF) {'Latin-1Supplement'}
        elseif($c -le 0x017F) {'LatinExtended-A'}
        elseif($c -le 0x024F) {'LatinExtended-B'}
        elseif($c -le 0x02AF) {'IPAExtensions'}
        elseif($c -le 0x02FF) {'SpacingModifierLetters'}
        elseif($c -le 0x036F) {'CombiningDiacriticalMarks'}
        elseif($c -le 0x03FF) {'Greek'} # or GreekandCoptic
        elseif($c -le 0x04FF) {'Cyrillic'}
        elseif($c -le 0x052F) {'CyrillicSupplement'}
        elseif($c -le 0x058F) {'Armenian'}
        elseif($c -le 0x05FF) {'Hebrew'}
        elseif($c -le 0x06FF) {'Arabic'}
        elseif($c -le 0x074F) {'Syriac'}
        elseif($c -le 0x077F) {'ArabicSupplement'} # not supported
        elseif($c -le 0x07BF) {'Thaana'}
        elseif($c -le 0x07C0) {'NKo'} # not supported
        elseif($c -le 0x083F) {'Samaritan'} # not supported
        elseif($c -le 0x085F) {'Mandaic'} # not supported
        elseif($c -le 0x089F) {'Invalid'} # not supported
        elseif($c -le 0x08FF) {'ArabicExtended-A'} # not supported
        elseif($c -le 0x097F) {'Devanagari'}
        elseif($c -le 0x09FF) {'Bengali'}
        elseif($c -le 0x0A7F) {'Gurmukhi'}
        elseif($c -le 0x0AFF) {'Gujarati'}
        elseif($c -le 0x0B7F) {'Oriya'}
        elseif($c -le 0x0BFF) {'Tamil'}
        elseif($c -le 0x0C7F) {'Telugu'}
        elseif($c -le 0x0CFF) {'Kannada'}
        elseif($c -le 0x0D7F) {'Malayalam'}
        elseif($c -le 0x0DFF) {'Sinhala'}
        elseif($c -le 0x0E7F) {'Thai'}
        elseif($c -le 0x0EFF) {'Lao'}
        elseif($c -le 0x0FFF) {'Tibetan'}
        elseif($c -le 0x109F) {'Myanmar'}
        elseif($c -le 0x10FF) {'Georgian'}
        elseif($c -le 0x11FF) {'HangulJamo'}
        elseif($c -le 0x137F) {'Ethiopic'}
        elseif($c -le 0x139F) {'EthiopicSupplement'} # not supported
        elseif($c -le 0x13FF) {'Cherokee'}
        elseif($c -le 0x167F) {'UnifiedCanadianAboriginalSyllabics'}
        elseif($c -le 0x169F) {'Ogham'}
        elseif($c -le 0x16FF) {'Runic'}
        elseif($c -le 0x171F) {'Tagalog'}
        elseif($c -le 0x173F) {'Hanunoo'}
        elseif($c -le 0x175F) {'Buhid'}
        elseif($c -le 0x177F) {'Tagbanwa'}
        elseif($c -le 0x17FF) {'Khmer'}
        elseif($c -le 0x18AF) {'Mongolian'}
        elseif($c -le 0x18FF) {'UnifiedCanadianAboriginalSyllabicsExtended'} # not supported
        elseif($c -le 0x194F) {'Limbu'}
        elseif($c -le 0x197F) {'TaiLe'}
        elseif($c -le 0x19DF) {'NewTaiLue'} # not supported
        elseif($c -le 0x19FF) {'KhmerSymbols'}
        elseif($c -le 0x1A1F) {'Buginese'} # not supported
        elseif($c -le 0x1AAF) {'TaiTham'} # not supported
        elseif($c -le 0x1AFF) {'CombiningDiacriticalMarksExtended'} # not supported
        elseif($c -le 0x1B7F) {'Balinese'} # not supported
        elseif($c -le 0x1BBF) {'Sundanese'} # not supported
        elseif($c -le 0x1BFF) {'Batak'} # not supported
        elseif($c -le 0x1C4F) {'Lepcha'} # not supported
        elseif($c -le 0x1C7F) {'OlChiki'} # not supported
        elseif($c -le 0x1CCF) {'SundaneseSupplement'} # not supported
        elseif($c -le 0x1CFF) {'VedicExtensions'} # not supported
        elseif($c -le 0x1D7F) {'PhoneticExtensions'}
        elseif($c -le 0x1DBF) {'PhoneticExtensionsSupplement'} # not supported
        elseif($c -le 0x1DFF) {'CombiningDiacriticalMarksSupplement'} # not supported
        elseif($c -le 0x1EFF) {'LatinExtendedAdditional'}
        elseif($c -le 0x1FFF) {'GreekExtended'}
        elseif($c -le 0x206F) {'GeneralPunctuation'}
        elseif($c -le 0x209F) {'SuperscriptsandSubscripts'}
        elseif($c -le 0x20CF) {'CurrencySymbols'}
        elseif($c -le 0x20FF) {'CombiningMarksforSymbols'} # or CombiningDiacriticalMarksforSymbols
        elseif($c -le 0x214F) {'LetterlikeSymbols'}
        elseif($c -le 0x218F) {'NumberForms'}
        elseif($c -le 0x21FF) {'Arrows'}
        elseif($c -le 0x22FF) {'MathematicalOperators'}
        elseif($c -le 0x23FF) {'MiscellaneousTechnical'}
        elseif($c -le 0x243F) {'ControlPictures'}
        elseif($c -le 0x245F) {'OpticalCharacterRecognition'}
        elseif($c -le 0x24FF) {'EnclosedAlphanumerics'}
        elseif($c -le 0x257F) {'BoxDrawing'}
        elseif($c -le 0x259F) {'BlockElements'}
        elseif($c -le 0x25FF) {'GeometricShapes'}
        elseif($c -le 0x26FF) {'MiscellaneousSymbols'}
        elseif($c -le 0x27BF) {'Dingbats'}
        elseif($c -le 0x27EF) {'MiscellaneousMathematicalSymbols-A'}
        elseif($c -le 0x27FF) {'SupplementalArrows-A'}
        elseif($c -le 0x28FF) {'BraillePatterns'}
        elseif($c -le 0x297F) {'SupplementalArrows-B'}
        elseif($c -le 0x29FF) {'MiscellaneousMathematicalSymbols-B'}
        elseif($c -le 0x2AFF) {'SupplementalMathematicalOperators'}
        elseif($c -le 0x2BFF) {'MiscellaneousSymbolsandArrows'}
        elseif($c -le 0x2C5F) {'Glagolitic'} # not supported
        elseif($c -le 0x2C7F) {'LatinExtended-C'} # not supported
        elseif($c -le 0x2CFF) {'Coptic'} # not supported
        elseif($c -le 0x2D2F) {'GeorgianSupplement'} # not supported
        elseif($c -le 0x2D7F) {'Tifinagh'} # not supported
        elseif($c -le 0x2DDF) {'EthiopicExtended'} # not supported
        elseif($c -le 0x2DFF) {'CyrillicExtended-A'} # not supported
        elseif($c -le 0x2E7F) {'SupplementalPunctuation'} # not supported
        elseif($c -le 0x2EFF) {'CJKRadicalsSupplement'}
        elseif($c -le 0x2FEF) {'Invalid'} # not supported
        elseif($c -le 0x2FDF) {'KangxiRadicals'}
        elseif($c -le 0x2FFF) {'IdeographicDescriptionCharacters'}
        elseif($c -le 0x303F) {'CJKSymbolsandPunctuation'}
        elseif($c -le 0x309F) {'Hiragana'}
        elseif($c -le 0x30FF) {'Katakana'}
        elseif($c -le 0x312F) {'Bopomofo'}
        elseif($c -le 0x318F) {'HangulCompatibilityJamo'}
        elseif($c -le 0x319F) {'Kanbun'}
        elseif($c -le 0x31BF) {'BopomofoExtended'}
        elseif($c -le 0x31EF) {'CJKStrokes'} # not supported
        elseif($c -le 0x31FF) {'KatakanaPhoneticExtensions'}
        elseif($c -le 0x32FF) {'EnclosedCJKLettersandMonths'}
        elseif($c -le 0x33FF) {'CJKCompatibility'}
        elseif($c -le 0x4DBF) {'CJKUnifiedIdeographsExtensionA'}
        elseif($c -le 0x4DFF) {'YijingHexagramSymbols'}
        elseif($c -le 0x9FFF) {'CJKUnifiedIdeographs'}
        elseif($c -le 0xA48F) {'YiSyllables'}
        elseif($c -le 0xA4CF) {'YiRadicals'}
        elseif($c -le 0xA4FF) {'Lisu'} # not supported
        elseif($c -le 0xA63F) {'Vai'} # not supported
        elseif($c -le 0xA69F) {'CyrillicExtended-B'} # not supported
        elseif($c -le 0xA6FF) {'Bamum'} # not supported
        elseif($c -le 0xA71F) {'ModifierToneLetters'} # not supported
        elseif($c -le 0xA7FF) {'LatinExtended-D'} # not supported
        elseif($c -le 0xA82F) {'SylotiNagri'} # not supported
        elseif($c -le 0xA83F) {'CommonIndicNumberForms'} # not supported
        elseif($c -le 0xA87F) {'Phags-pa'} # not supported
        elseif($c -le 0xA8DF) {'Saurashtra'} # not supported
        elseif($c -le 0xA8FF) {'DevanagariExtended'} # not supported
        elseif($c -le 0xA92F) {'KayahLi'} # not supported
        elseif($c -le 0xA95F) {'Rejang'} # not supported
        elseif($c -le 0xA97F) {'HangulJamoExtended-A'} # not supported
        elseif($c -le 0xA9DF) {'Javanese'} # not supported
        elseif($c -le 0xA9FF) {'MyanmarExtended-B'} # not supported
        elseif($c -le 0xAA5F) {'Cham'} # not supported
        elseif($c -le 0xAA7F) {'MyanmarExtended-A'} # not supported
        elseif($c -le 0xAADF) {'TaiViet'} # not supported
        elseif($c -le 0xAAFF) {'MeeteiMayekExtensions'} # not supported
        elseif($c -le 0xAB2F) {'EthiopicExtended-A'} # not supported
        elseif($c -le 0xAB6F) {'LatinExtended-E'} # not supported
        elseif($c -le 0xABFF) {'MeeteiMayek'} # not supported
        elseif($c -le 0xD7AF) {'HangulSyllables'}
        elseif($c -le 0xD7FF) {'HangulJamoExtended-B'} # not supported
        elseif($c -le 0xDB7F) {'HighSurrogates'}
        elseif($c -le 0xDBFF) {'HighPrivateUseSurrogates'}
        elseif($c -le 0xDFFF) {'LowSurrogates'}
        elseif($c -le 0xF8FF) {'PrivateUse'} # or PrivateUseArea
        elseif($c -le 0xFAFF) {'CJKCompatibilityIdeographs'}
        elseif($c -le 0xFB4F) {'AlphabeticPresentationForms'}
        elseif($c -le 0xFDFF) {'ArabicPresentationForms-A'}
        elseif($c -le 0xFE0F) {'VariationSelectors'}
        elseif($c -le 0xFE1F) {'VerticalForms'} # not supported
        elseif($c -le 0xFE2F) {'CombiningHalfMarks'}
        elseif($c -le 0xFE4F) {'CJKCompatibilityForms'}
        elseif($c -le 0xFE6F) {'SmallFormVariants'}
        elseif($c -le 0xFEFF) {'ArabicPresentationForms-B'}
        elseif($c -le 0xFFEF) {'HalfwidthandFullwidthForms'}
        elseif($c -le 0xFFFF) {'Specials'}
        else {'Impossible'} #TODO: Astral Plane
    }
    function Convert-UnicodeBlockToRange($b)
    {
        switch($Block)
        {
            BasicMultilingualPlane {[int][char]::MinValue,[int][char]::MaxValue}
            BMP {[int][char]::MinValue,[int][char]::MaxValue}
            ASCII {0x0000,0x007F}
            0x2xxx {0x2000,0x2FFF}
            BasicLatin {0x0000,0x007F}
            Latin1Supplement {0x0080,0x00FF}
            LatinExtendedA {0x0100,0x017F}
            LatinExtendedB {0x0180,0x024F}
            IPAExtensions {0x0250,0x02AF}
            SpacingModifierLetters {0x02B0,0x02FF}
            CombiningDiacriticalMarks {0x0300,0x036F}
            GreekandCoptic {0x0370,0x03FF}
            Cyrillic {0x0400,0x04FF}
            CyrillicSupplement {0x0500,0x052F}
            Armenian {0x0530,0x058F}
            Hebrew {0x0590,0x05FF}
            Arabic {0x0600,0x06FF}
            Syriac {0x0700,0x074F}
            ArabicSupplement {0x0750,0x077F}
            Thaana {0x0780,0x07BF}
            NKo {0x07C0,0x07FF}
            Samaritan {0x0800,0x083F}
            Mandaic {0x0840,0x085F}
            ArabicExtendedA {0x08A0,0x08FF}
            Devanagari {0x0900,0x097F}
            Bengali {0x0980,0x09FF}
            Gurmukhi {0x0A00,0x0A7F}
            Gujarati {0x0A80,0x0AFF}
            Oriya {0x0B00,0x0B7F}
            Tamil {0x0B80,0x0BFF}
            Telugu {0x0C00,0x0C7F}
            Kannada {0x0C80,0x0CFF}
            Malayalam {0x0D00,0x0D7F}
            Sinhala {0x0D80,0x0DFF}
            Thai {0x0E00,0x0E7F}
            Lao {0x0E80,0x0EFF}
            Tibetan {0x0F00,0x0FFF}
            Myanmar {0x1000,0x109F}
            Georgian {0x10A0,0x10FF}
            HangulJamo {0x1100,0x11FF}
            Ethiopic {0x1200,0x137F}
            EthiopicSupplement {0x1380,0x139F}
            Cherokee {0x13A0,0x13FF}
            UnifiedCanadianAboriginalSyllabics {0x1400,0x167F}
            Ogham {0x1680,0x169F}
            Runic {0x16A0,0x16FF}
            Tagalog {0x1700,0x171F}
            Hanunoo {0x1720,0x173F}
            Buhid {0x1740,0x175F}
            Tagbanwa {0x1760,0x177F}
            Khmer {0x1780,0x17FF}
            Mongolian {0x1800,0x18AF}
            UnifiedCanadianAboriginalSyllabicsExtended {0x18B0,0x18FF}
            Limbu {0x1900,0x194F}
            TaiLe {0x1950,0x197F}
            NewTaiLue {0x1980,0x19DF}
            KhmerSymbols {0x19E0,0x19FF}
            Buginese {0x1A00,0x1A1F}
            TaiTham {0x1A20,0x1AAF}
            CombiningDiacriticalMarksExtended {0x1AB0,0x1AFF}
            Balinese {0x1B00,0x1B7F}
            Sundanese {0x1B80,0x1BBF}
            Batak {0x1BC0,0x1BFF}
            Lepcha {0x1C00,0x1C4F}
            OlChiki {0x1C50,0x1C7F}
            SundaneseSupplement {0x1CC0,0x1CCF}
            VedicExtensions {0x1CD0,0x1CFF}
            PhoneticExtensions {0x1D00,0x1D7F}
            PhoneticExtensionsSupplement {0x1D80,0x1DBF}
            CombiningDiacriticalMarksSupplement {0x1DC0,0x1DFF}
            LatinExtendedAdditional {0x1E00,0x1EFF}
            GreekExtended {0x1F00,0x1FFF}
            GeneralPunctuation {0x2000,0x206F}
            SuperscriptsandSubscripts {0x2070,0x209F}
            CurrencySymbols {0x20A0,0x20CF}
            CombiningDiacriticalMarksforSymbols {0x20D0,0x20FF}
            LetterlikeSymbols {0x2100,0x214F}
            NumberForms {0x2150,0x218F}
            Arrows {0x2190,0x21FF}
            MathematicalOperators {0x2200,0x22FF}
            MiscellaneousTechnical {0x2300,0x23FF}
            ControlPictures {0x2400,0x243F}
            OpticalCharacterRecognition {0x2440,0x245F}
            EnclosedAlphanumerics {0x2460,0x24FF}
            BoxDrawing {0x2500,0x257F}
            BlockElements {0x2580,0x259F}
            GeometricShapes {0x25A0,0x25FF}
            MiscellaneousSymbols {0x2600,0x26FF}
            Dingbats {0x2700,0x27BF}
            MiscellaneousMathematicalSymbolsA {0x27C0,0x27EF}
            SupplementalArrowsA {0x27F0,0x27FF}
            BraillePatterns {0x2800,0x28FF}
            SupplementalArrowsB {0x2900,0x297F}
            MiscellaneousMathematicalSymbolsB {0x2980,0x29FF}
            SupplementalMathematicalOperators {0x2A00,0x2AFF}
            MiscellaneousSymbolsandArrows {0x2B00,0x2BFF}
            Glagolitic {0x2C00,0x2C5F}
            LatinExtendedC {0x2C60,0x2C7F}
            Coptic {0x2C80,0x2CFF}
            GeorgianSupplement {0x2D00,0x2D2F}
            Tifinagh {0x2D30,0x2D7F}
            EthiopicExtended {0x2D80,0x2DDF}
            CyrillicExtendedA {0x2DE0,0x2DFF}
            SupplementalPunctuation {0x2E00,0x2E7F}
            CJKRadicalsSupplement {0x2E80,0x2EFF}
            KangxiRadicals {0x2F00,0x2FDF}
            IdeographicDescriptionCharacters {0x2FF0,0x2FFF}
            CJKSymbolsandPunctuation {0x3000,0x303F}
            Hiragana {0x3040,0x309F}
            Katakana {0x30A0,0x30FF}
            Bopomofo {0x3100,0x312F}
            HangulCompatibilityJamo {0x3130,0x318F}
            Kanbun {0x3190,0x319F}
            BopomofoExtended {0x31A0,0x31BF}
            CJKStrokes {0x31C0,0x31EF}
            KatakanaPhoneticExtensions {0x31F0,0x31FF}
            EnclosedCJKLettersandMonths {0x3200,0x32FF}
            CJKCompatibility {0x3300,0x33FF}
            CJKUnifiedIdeographsExtensionA {0x3400,0x4DBF}
            YijingHexagramSymbols {0x4DC0,0x4DFF}
            CJKUnifiedIdeographs {0x4E00,0x9FFF}
            YiSyllables {0xA000,0xA48F}
            YiRadicals {0xA490,0xA4CF}
            Lisu {0xA4D0,0xA4FF}
            Vai {0xA500,0xA63F}
            CyrillicExtendedB {0xA640,0xA69F}
            Bamum {0xA6A0,0xA6FF}
            ModifierToneLetters {0xA700,0xA71F}
            LatinExtendedD {0xA720,0xA7FF}
            SylotiNagri {0xA800,0xA82F}
            CommonIndicNumberForms {0xA830,0xA83F}
            Phagspa {0xA840,0xA87F}
            Saurashtra {0xA880,0xA8DF}
            DevanagariExtended {0xA8E0,0xA8FF}
            KayahLi {0xA900,0xA92F}
            Rejang {0xA930,0xA95F}
            HangulJamoExtendedA {0xA960,0xA97F}
            Javanese {0xA980,0xA9DF}
            MyanmarExtendedB {0xA9E0,0xA9FF}
            Cham {0xAA00,0xAA5F}
            MyanmarExtendedA {0xAA60,0xAA7F}
            TaiViet {0xAA80,0xAADF}
            MeeteiMayekExtensions {0xAAE0,0xAAFF}
            EthiopicExtendedA {0xAB00,0xAB2F}
            LatinExtendedE {0xAB30,0xAB6F}
            MeeteiMayek {0xABC0,0xABFF}
            HangulSyllables {0xAC00,0xD7AF}
            HangulJamoExtendedB {0xD7B0,0xD7FF}
            HighSurrogates {0xD800,0xDB7F}
            HighPrivateUseSurrogates {0xDB80,0xDBFF}
            LowSurrogates {0xDC00,0xDFFF}
            PrivateUseArea {0xE000,0xF8FF}
            CJKCompatibilityIdeographs {0xF900,0xFAFF}
            AlphabeticPresentationForms {0xFB00,0xFB4F}
            ArabicPresentationFormsA {0xFB50,0xFDFF}
            VariationSelectors {0xFE00,0xFE0F}
            VerticalForms {0xFE10,0xFE1F}
            CombiningHalfMarks {0xFE20,0xFE2F}
            CJKCompatibilityForms {0xFE30,0xFE4F}
            SmallFormVariants {0xFE50,0xFE6F}
            ArabicPresentationFormsB {0xFE70,0xFEFF}
            HalfwidthandFullwidthForms {0xFF00,0xFFEF}
            Specials {0xFFF0,0xFFFF}
            default {[int][char]::MinValue,[int][char]::MaxValue}
        }
    }
    function Find-UnicodeCategoryClass([char]$c)
    {
        @('Lu','Ll','Lt','Lm','Lo','L','Mn','Mc','Me','M','Nd','Nl','No','N','Pc','Pd','Ps','Pe','Pi',
            'Pf','Po','P','Sm','Sc','Sk','So','S','Zs','Zl','Zp','Z','Cc','Cf','Cs','Co','Cn','C') |
            Where-Object {$c -cmatch "\p{$_}"}
    }
    $invalidUserNameChars = '"/\[]:;|=,+*?<>'.ToCharArray() # https://technet.microsoft.com/en-us/library/bb726984.aspx
    $invalidFileNameChars = [IO.Path]::GetInvalidFileNameChars()  # https://msdn.microsoft.com/library/system.io.path.getinvalidfilenamechars.aspx
    $notablock = @'
ArabicSupplement
NKo
Samaritan
Mandaic
Invalid
ArabicExtended-A
EthiopicSupplement
UnifiedCanadianAboriginalSyllabicsExtended
NewTaiLue
Buginese
TaiTham
CombiningDiacriticalMarksExtended
Balinese
Sundanese
Batak
Lepcha
OlChiki
SundaneseSupplement
VedicExtensions
PhoneticExtensionsSupplement
CombiningDiacriticalMarksSupplement
Glagolitic
LatinExtended-C
Coptic
GeorgianSupplement
Tifinagh
EthiopicExtended
CyrillicExtended-A
SupplementalPunctuation
CJKStrokes
Lisu
Vai
CyrillicExtended-B
Bamum
ModifierToneLetters
LatinExtended-D
SylotiNagri
CommonIndicNumberForms
Phags-pa
Saurashtra
DevanagariExtended
KayahLi
Rejang
HangulJamoExtended-A
Javanese
MyanmarExtended-B
Cham
MyanmarExtended-A
TaiViet
MeeteiMayekExtensions
EthiopicExtended-A
LatinExtended-E
MeeteiMayek
HangulJamoExtended-B
VerticalForms
'@
 -split '\s+'
    function Get-CharacterDetail([char]$c)
    {
        $properties = [ordered]@{
            Character           = $c
            Value               = [int]$c
            CodePoint           = 'U+{0:X4}' -f [int]$c
            UnicodeName         = Get-UnicodeName ([int]$c)
            UnicodeBlock        = ''
            MatchesBlock        = ''
            UnicodeCategory     = [char]::GetUnicodeCategory($c)
            CategoryClasses     = Find-UnicodeCategoryClass($c)
            PasswordCategory    = $IsWindows ? [PasswordCharacter]::GetCharacterType($c) : $null
            XmlEscape           = [Security.SecurityElement]::Escape($c)
            HtmlAttributeEncode = [Web.HttpUtility]::HtmlAttributeEncode($c)
            UrlEncode           = [Net.WebUtility]::UrlEncode($c)
            HttpUrlEncode       = [Web.HttpUtility]::UrlEncode("$c")
            UrlEncodeUnicode    = [Web.HttpUtility]::UrlEncodeUnicode($c)
            EscapeDataString    = $(try{[uri]::EscapeDataString($c)}catch{$_.Exception.GetType().Name})
            EscapeUriString     = $(try{[uri]::EscapeUriString($c)}catch{$_.Exception.GetType().Name})
            UrlPathEncode       = [Web.HttpUtility]::UrlPathEncode($c)
            IsControl           = [char]::IsControl($c)
            IsDigit             = [char]::IsDigit($c)
            IsHighSurrogate     = [char]::IsHighSurrogate($c)
            IsLegalUserName     = $invalidUserNameChars -notcontains [char]$c
            IsLegalFileName     = $invalidFileNameChars -notcontains [char]$c
            IsLetter            = [char]::IsLetter($c)
            IsLetterOrDigit     = [char]::IsLetterOrDigit($c)
            IsLower             = [char]::IsLower($c)
            IsLowSurrogate      = [char]::IsLowSurrogate($c)
            IsMark              = $c -match '\p{M}'
            IsNumber            = [char]::IsNumber($c)
            IsPunctuation       = [char]::IsPunctuation($c)
            IsSeparator         = [char]::IsSeparator($c)
            IsSurrogate         = [char]::IsSurrogate($c)
            IsSymbol            = [char]::IsSymbol($c)
            IsUpper             = [char]::IsUpper($c)
            IsWhiteSpace        = [char]::IsWhiteSpace($c)
            IsWord              = $c -match '\w'
        }
        if( ($IsControl -and !$properties.IsControl) -or
            ($NotControl -and $properties.IsControl) -or
            ($IsDigit -and !$properties.IsDigit) -or
            ($NotDigit -and $properties.IsDigit) -or
            ($IsHighSurrogate -and !$properties.IsHighSurrogate) -or
            ($NotHighSurrogate -and $properties.IsHighSurrogate) -or
            ($IsLegalUserName -and !$properties.IsLegalUserName) -or
            ($NotLegalUserName -and $properties.IsLegalUserName) -or
            ($IsLegalFileName -and !$properties.IsLegalFileName) -or
            ($NotLegalFileName -and $properties.IsLegalFileName) -or
            ($IsLetter -and !$properties.IsLetter) -or
            ($NotLetter -and $properties.IsLetter) -or
            ($IsLetterOrDigit -and !$properties.IsLetterOrDigit) -or
            ($NotLetterOrDigit -and $properties.IsLetterOrDigit) -or
            ($IsLower -and !$properties.IsLower) -or
            ($NotLower -and $properties.IsLower) -or
            ($IsLowSurrogate -and !$properties.IsLowSurrogate) -or
            ($NotLowSurrogate -and $properties.IsLowSurrogate) -or
            ($IsMark -and !$properties.IsMark) -or
            ($NotMark -and $properties.IsMark) -or
            ($IsNumber -and !$properties.IsNumber) -or
            ($NotNumber -and $properties.IsNumber) -or
            ($IsPunctuation -and !$properties.IsPunctuation) -or
            ($NotPunctuation -and $properties.IsPunctuation) -or
            ($IsSeparator -and !$properties.IsSeparator) -or
            ($NotSeparator -and $properties.IsSeparator) -or
            ($IsSurrogate -and !$properties.IsSurrogate) -or
            ($NotSurrogate -and $properties.IsSurrogate) -or
            ($IsSymbol -and !$properties.IsSymbol) -or
            ($NotSymbol -and $properties.IsSymbol) -or
            ($IsUpper -and !$properties.IsUpper) -or
            ($NotUpper -and $properties.IsUpper) -or
            ($IsWhiteSpace -and !$properties.IsWhiteSpace) -or
            ($NotWhiteSpace -and $properties.IsWhiteSpace) -or
            ($IsWord -and !$properties.IsWord) -or
            ($NotWord -and $properties.IsWord) )
                {return}
        $b = Find-UnicodeRangeBlock $c
        $properties.UnicodeBlock = $b
        $properties.MatchesBlock = if($notablock -contains $b) {'Error'} else {$c -match "\p{Is$b}"}
        New-Object PSObject -Property $properties
    }
    function Get-CharactersDetail([Parameter(ValueFromPipeline=$true)][string]$Chars)
    {
        foreach($c in $Chars.GetEnumerator()) {Get-CharacterDetail $c}
    }
    function Get-CharacterRangeDetail([int]$start,[int]$stop)
    {
        $i,$max = 0,(($stop - $start)/100)
        $start..$stop |ForEach-Object {
            [char]$c = $_
            Get-CharacterDetail $c
            Write-Progress 'Gathering Character Details' -CurrentOperation ('Character: U+{0:X4} {1}' -f $_,$c) -PercentComplete ($i++/$max) -EA SilentlyContinue
        }
        Write-Progress 'Gathering Character Details' -Completed
    }
}
Process
{
    switch($PSCmdlet.ParameterSetName)
    {
        Block { $start,$stop = Convert-UnicodeBlockToRange $Block; Get-CharacterRangeDetail $start $stop }
        Char  { $Char |Get-CharactersDetail }
        Range { Get-CharacterRangeDetail $StartValue $StopValue }
        Value
        {
            if($Value -gt [char]::MaxValue) { [char]::ConvertFromUtf32($Value) |Get-CharactersDetail }
            else { Get-CharacterDetail $Value }
        }
    }
}

}

function Get-Unicode
{
<#
.SYNOPSIS
Returns the (UTF-16) .NET string for a given Unicode codepoint, which may be a surrogate pair.
 
.NOTES
An alias of U+ allows you to interpolate a codepoint like this "$(U+ 0x1F5A7) Network"
 
This script is mostly useful to Windows PowerShell (before version 6), since PowerShell Core
supports the new `u{1F5A5} syntax.
 
.INPUTS
System.Int32 value of a Unicode codepoint.
 
.OUTPUTS
System.String of Unicode character(s) identified by codepoints.
 
.FUNCTIONALITY
Unicode
 
.LINK
https://docs.microsoft.com/dotnet/api/system.char.convertfromutf32
 
.LINK
https://docs.microsoft.com/powershell/module/microsoft.powershell.core/about/about_special_characters#unicode-character-ux
 
.LINK
https://emojipedia.org/variation-selector-16/
 
.EXAMPLE
"$(Get-Unicode 0x1F5A7) Network"
 
<three networked computers> Network
#>


[CmdletBinding()][OutputType([string])] Param(
# The integer value of a Unicode codepoint to convert into a .NET string.
[Parameter(Position=0,Mandatory=$true,ValueFromPipeline=$true)][int] $Codepoint,
<#
Appends a U+FE0F VARIATION SELECTOR-16 suffix to the character, which suggests an emoji presentation
for characters that support both a simple text presentation as well as a color emoji-style one.
#>

[switch] $AsEmoji,
<#
Appends a U+FE0E VARIATION SELECTOR-15 suffix to the character, which suggests a non-emoji text
presentation for characters that support both a simple text presentation as well as a color
emoji-style one.
#>

[Alias('NotEmoji','AsPlainText')][switch] $AsText,
# Outputs the codepoint as a usable PowerShell string literal.
[switch] $AsStringLiteral
)
Begin { [char[]] $c = @() }
Process
{
    [char]::ConvertFromUtf32($Codepoint).GetEnumerator() |ForEach-Object {$c += $_}
    if($AsEmoji) {$c += 0xFE0F}
    elseif($AsText) {$c += 0xFE0E}
}
End
{
    $s = New-Object string $c,0,$c.Length
    if(!$AsStringLiteral) {$s}
    else {$Local:OFS='';"`"$($s.GetEnumerator() |ForEach-Object {'$([char]0x{0:X4})' -f [int]$_})`""}
}

}

function Get-UnicodeByName
{
<#
.SYNOPSIS
Returns characters based on Unicode code point name, GitHub short code, or HTML entity.
 
.INPUTS
System.String of a character name.
 
.OUTPUTS
System.String of the character(s) referenced by name.
 
.FUNCTIONALITY
Unicode
 
.LINK
https://www.unicode.org/Public/UCD/latest/ucd/NameAliases.txt
 
.LINK
https://html.spec.whatwg.org/multipage/named-characters.html
 
.EXAMPLE
Get-UnicodeByName hyphen-minus
 
-
 
.EXAMPLE
Get-UnicodeByName slash
 
/
 
.EXAMPLE
Get-UnicodeByName :zero:
 
[0]
 
.EXAMPLE
Get-UnicodeByName '&amp;'
 
&
 
.EXAMPLE
Get-UnicodeByName BEL
 
(beeps)
#>


[CmdletBinding()][OutputType([string])] Param(
# The name or alias of a Unicode character.
[Parameter(ParameterSetName='Name',Position=0,Mandatory=$true,ValueFromPipeline=$true)][string] $Name,
<#
Appends a U+FE0F VARIATION SELECTOR-16 suffix to the character, which suggests an emoji presentation
for characters that support both a simple text presentation as well as a color emoji-style one.
#>

[switch] $AsEmoji,
# Update the character name database.
[Parameter(ParameterSetName='Update')][switch] $Update
)
Begin
{
    $basename = Join-Path $PSScriptRoot data UnicodeByName
    $cc = ConvertFrom-StringData (Get-Content "$basename.cc.txt" -Raw)
    $codepoint = ConvertFrom-StringData (Get-Content "$basename.txt" -Raw)
    $html = Get-Content "$basename.html.json" -Raw |ConvertFrom-Json -AsHashtable
    $github = ConvertFrom-StringData (Get-Content "$basename.github.txt" -Raw)
    filter ConvertTo-Char([Parameter(ValueFromPipeline)][string] $Value)
    {
        $result = (($Value -split '\W+') |
            ForEach-Object {[char]::ConvertFromUtf32([convert]::ToInt32($_,16))}) -join ''
        return $AsEmoji ? $result + ([char]0xFE0F) : $result
    }
}
Process
{
    if($Update)
    {
        $conflictingOldNames = '0007','01B7','0292','0404','0406','0454','0456','10D0','10D1','10D2','10D3','10D4',
            '10D5','10D6','10D7','10D8','10D9','10DA','10DB','10DC','10DD','10DE','10DF','10E0','10E1','10E2','10E3',
            '10E4','10E5','10E6','10E7','10E8','10E9','10EA','10EB','10EC','10ED','10EE','10EF','10F0','10F1','10F2',
            '10F3','10F4','10F5','2016','314A','314B','314D','3209','320A','320C','3269','326A','326C','33B7','FFBA',
            'FFBB','FFBD'
        Get-UnicodeData |
            ForEach-Object {
                if($_.OldName -and $_.Value -notin $conflictingOldNames){$_.OldName+'='+$_.Value}
                if($_.Name -ne '<control>'){$_.Name+'='+$_.Value}
            } |Out-File "$basename.txt" -Encoding utf8
        Invoke-WebRequest https://html.spec.whatwg.org/entities.json -OutFile "$basename.html.json"
        (Invoke-RestMethod https://api.github.com/emojis).PSObject.Properties |
            Where-Object {$_.Value -notlike "*/$($_.Name).png[?]v8"} |
            ForEach-Object {':'+$_.Name+':='+(((([uri]$_.Value).Segments[-1]) -replace '\.png\z').ToUpper() -replace '-',',')} |
            Out-File "$basename.github.txt" -Encoding utf8
        Write-Information 'Updated.'
        return
    }
    else
    {
        if($cc.ContainsKey($Name)) {return $cc[$Name] |ConvertTo-Char}
        elseif($github.ContainsKey($Name)) {return $github[$Name] |ConvertTo-Char}
        elseif($html.ContainsKey($Name)) {return ($html[$Name].characters -join '') + ($AsEmoji ? [char]0xFE0F : '')}
        else {return $codepoint[$Name] |ConvertTo-Char}
    }
}

}

function Get-UnicodeName
{
<#
.SYNOPSIS
Returns the name of a Unicode code point.
 
.INPUTS
System.Int32 of a Unicode code point value to name, or
System.String of Unicode characters to name.
 
.OUTPUTS
System.String of the Unicode code point name.
 
.FUNCTIONALITY
Unicode
 
.LINK
https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 
.EXAMPLE
Get-UnicodeName 32
 
SPACE
#>


[CmdletBinding()][OutputType([string])] Param(
# The numeric value of the Unicode character.
[Parameter(ParameterSetName='CodePoint',Position=0,Mandatory=$true,ValueFromPipeline=$true)][int] $CodePoint,
# The Unicode character.
[Parameter(ParameterSetName='Character',Position=0,Mandatory=$true,ValueFromPipeline=$true)][string] $Character,
# Update the character name database.
[Parameter(ParameterSetName='Update')][switch] $Update
)
Begin
{
    $basename = Join-Path -Path $PSScriptRoot -ChildPath data -AdditionalChildPath UnicodeName
    $cc = ConvertFrom-StringData (Get-Content "$basename.cc.txt" -Raw)
    $name = ConvertFrom-StringData (Get-Content "$basename.txt" -Raw)
}
Process
{
    switch($PSCmdlet.ParameterSetName)
    {
        Update
        {
            Get-UnicodeData |
                Select-Object Value,@{n='Name';e={
                    $hex = '{0:X4}' -f $_.Value
                    $cc.ContainsKey($hex) ? $cc[$hex] : $_.Name
                }} |
                Export-Csv "$basename.txt" -Delimiter '=' -UseQuotes AsNeeded
            Write-Information 'Updated.'
            return
        }
        Character
        {
            return $Character.GetEnumerator() |ForEach-Object {[int]$_} |Get-UnicodeName
        }
        default
        {
            $hex = '{0:X4}' -f $CodePoint
            return $cc.ContainsKey($hex) ? $cc[$hex] : $name[$hex]
        }
    }
}

}

function Import-CharConstants
{
<#
.SYNOPSIS
Imports characters by name as constants into the current scope.
 
.INPUTS
System.String containing a character name.
 
.FUNCTIONALITY
Unicode
 
.LINK
Get-UnicodeByName
 
.EXAMPLE
Import-CharConstants NL :UP: HYPHEN-MINUS 'EN DASH' '&mdash;' '&copy;' -Scope Script
 
Creates constants in the context of the current script for the named characters.
#>


[CmdletBinding()] Param(
# The control code abbreviation, Unicode name, HTML entity, or GitHub name of the character to create a constant for.
# "NL" will use the newline appropriate to the environment.
[Parameter(ParameterSetName='UseNames',Position=0,Mandatory=$true,ValueFromPipeline=$true,ValueFromRemainingArguments=$true)][string[]] $CharacterName,
# A dictionary that maps character variable name aliases to control code abbreviations, Unicode names, HTML entities,
# or GitHub names of characters.
[Parameter(ParameterSetName='UseAliases',Mandatory=$true)][hashtable] $Alias,
# The scope of the constant.
[string] $Scope = 'Local',
<#
Appends a U+FE0F VARIATION SELECTOR-16 suffix to the character, which suggests an emoji presentation
for characters that support both a simple text presentation as well as a color emoji-style one.
#>

[switch] $AsEmoji
)
Begin
{
    $level = $Scope |Add-ScopeLevel

    filter Add-CharacterConstant
    {
        [CmdletBinding()] Param(
        [Parameter(Mandatory=$true,ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)][Alias('Key')][string] $Alias,
        [Parameter(Mandatory=$true,ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)][Alias('Value')][string] $CharacterName
        )
        $name = $Alias.Trim(':')
        $char = $CharacterName -eq 'NL' ? [Environment]::NewLine : (Get-UnicodeByName -Name $CharacterName -AsEmoji:$AsEmoji)
        $existing = Get-Variable -Name $name -Scope $level -ErrorAction Ignore
        if($existing -and ($existing.Options -eq 'Constant') -and ($existing.Value -eq $char)) {return}
        Set-Variable -Name $name -Value $char -Scope $level -Option Constant -Description $CharacterName
    }
}
Process
{
    switch($PSCmdlet.ParameterSetName)
    {
        UseNames {$CharacterName |Add-CharacterConstant}
        UseAliases {$Alias.GetEnumerator() |Add-CharacterConstant}
    }
}

}
Export-ModuleMember -Function Get-CharacterDetails,Get-Unicode,Get-UnicodeByName,Get-UnicodeName,Import-CharConstants