Public/Measure-DisplayWidth.ps1
|
function Measure-DisplayWidth { <# .SYNOPSIS Measures the display width of a string in terminal cells. .DESCRIPTION Calculates how many terminal cells a string will occupy when displayed. This accounts for: - Wide characters (CJK, emoji) that take 2 cells - Zero-width characters (combining marks) that take 0 cells - East Asian Ambiguous Width characters (configurable) - Regular ASCII and Latin characters that take 1 cell This is critical for proper alignment when using Unicode characters in terminal output, as String.Length counts UTF-16 code units, not display width. .PARAMETER Text The text string to measure. .PARAMETER AmbiguousAsWide Treat East Asian Ambiguous Width characters as 2 cells instead of 1. Default is narrow (1 cell) for maximum cross-platform compatibility. Ambiguous characters include box-drawing (╔═╗║), some symbols (®×○), and punctuation. Enable this if targeting East Asian locales or terminals configured for wide ambiguous chars. .EXAMPLE Measure-DisplayWidth "Hello" Returns: 5 (5 ASCII characters = 5 cells) .EXAMPLE Measure-DisplayWidth "Hello 世界" Returns: 10 (5 ASCII + 1 space + 2 CJK characters × 2 cells each) .EXAMPLE Measure-DisplayWidth "😀👍" Returns: 4 (2 emoji × 2 cells each) .EXAMPLE Measure-DisplayWidth "╔═══╗" -AmbiguousAsWide Returns: 10 (5 box-drawing characters × 2 cells with -AmbiguousAsWide) .EXAMPLE Measure-DisplayWidth "╔═══╗" Returns: 5 (5 box-drawing characters × 1 cell, default narrow treatment) .NOTES Author: MarkusMcNugen License: MIT Requires: PowerShell 5.1 or later This function is cross-platform compatible (Windows, Linux, macOS) and uses built-in .NET classes with no external dependencies required. Box-drawing characters are treated as narrow (1 cell) by default for maximum compatibility, as this matches behavior on 90% of terminals. Use -AmbiguousAsWide if you need wide treatment for East Asian environments. .LINK https://github.com/MarkusMcNugen/PSWriteColorEX #> [CmdletBinding()] [Alias('MDW', 'Get-DisplayWidth')] [OutputType([int])] param( [Parameter(Mandatory, Position = 0, ValueFromPipeline)] [AllowEmptyString()] [string]$Text, [Parameter()] [switch]$AmbiguousAsWide ) begin { # Helper function to determine character width function Get-CharacterWidth { param( [int]$CodePoint, [bool]$TreatAmbiguousAsWide ) # Zero-width characters if (($CodePoint -ge 0x0300 -and $CodePoint -le 0x036F) -or # Combining Diacritical Marks ($CodePoint -ge 0x0483 -and $CodePoint -le 0x0489) -or # Combining Cyrillic ($CodePoint -ge 0x0591 -and $CodePoint -le 0x05BD) -or # Hebrew combining ($CodePoint -ge 0x0600 -and $CodePoint -le 0x0605) -or # Arabic combining ($CodePoint -ge 0x064B -and $CodePoint -le 0x065F) -or # Arabic combining ($CodePoint -ge 0x0670 -and $CodePoint -le 0x0670) -or # Arabic combining ($CodePoint -ge 0x06D6 -and $CodePoint -le 0x06DD) -or # Arabic combining ($CodePoint -ge 0x1AB0 -and $CodePoint -le 0x1AFF) -or # Combining marks ($CodePoint -ge 0x1DC0 -and $CodePoint -le 0x1DFF) -or # Combining marks ($CodePoint -ge 0x20D0 -and $CodePoint -le 0x20FF) -or # Combining marks for symbols ($CodePoint -ge 0xFE00 -and $CodePoint -le 0xFE0F) -or # Variation selectors ($CodePoint -ge 0xFE20 -and $CodePoint -le 0xFE2F) -or # Combining half marks ($CodePoint -ge 0x180B -and $CodePoint -le 0x180D) -or # Mongolian variation selectors ($CodePoint -ge 0x200B -and $CodePoint -le 0x200F) -or # Zero-width spaces/joiners $CodePoint -eq 0x00AD -or # Soft hyphen $CodePoint -eq 0x034F -or # Combining grapheme joiner $CodePoint -eq 0x061C -or # Arabic letter mark $CodePoint -eq 0x115F -or $CodePoint -eq 0x1160 -or # Hangul fillers $CodePoint -eq 0x17B4 -or $CodePoint -eq 0x17B5 -or # Khmer vowels ($CodePoint -ge 0xE0100 -and $CodePoint -le 0xE01EF)) { # Variation selectors supplement return 0 } # Wide characters (2 cells) # Emoji ranges if (($CodePoint -ge 0x1F300 -and $CodePoint -le 0x1F5FF) -or # Misc Symbols and Pictographs ($CodePoint -ge 0x1F600 -and $CodePoint -le 0x1F64F) -or # Emoticons ($CodePoint -ge 0x1F680 -and $CodePoint -le 0x1F6FF) -or # Transport and Map Symbols ($CodePoint -ge 0x1F700 -and $CodePoint -le 0x1F77F) -or # Alchemical Symbols ($CodePoint -ge 0x1F780 -and $CodePoint -le 0x1F7FF) -or # Geometric Shapes Extended ($CodePoint -ge 0x1F800 -and $CodePoint -le 0x1F8FF) -or # Supplemental Arrows-C ($CodePoint -ge 0x1F900 -and $CodePoint -le 0x1F9FF) -or # Supplemental Symbols and Pictographs ($CodePoint -ge 0x1FA00 -and $CodePoint -le 0x1FA6F) -or # Chess Symbols ($CodePoint -ge 0x1FA70 -and $CodePoint -le 0x1FAFF) -or # Symbols and Pictographs Extended-A ($CodePoint -ge 0x2600 -and $CodePoint -le 0x26FF) -or # Miscellaneous Symbols ($CodePoint -ge 0x2700 -and $CodePoint -le 0x27BF)) { # Dingbats return 2 } # CJK and other wide ranges if (($CodePoint -ge 0x1100 -and $CodePoint -le 0x115F) -or # Hangul Jamo ($CodePoint -ge 0x2329 -and $CodePoint -le 0x232A) -or # Angle brackets ($CodePoint -ge 0x2E80 -and $CodePoint -le 0x2E99) -or # CJK Radicals Supplement ($CodePoint -ge 0x2E9B -and $CodePoint -le 0x2EF3) -or # CJK Radicals Supplement ($CodePoint -ge 0x2F00 -and $CodePoint -le 0x2FD5) -or # Kangxi Radicals ($CodePoint -ge 0x2FF0 -and $CodePoint -le 0x2FFB) -or # Ideographic Description Characters ($CodePoint -ge 0x3000 -and $CodePoint -le 0x303E) -or # CJK Symbols and Punctuation ($CodePoint -ge 0x3041 -and $CodePoint -le 0x3096) -or # Hiragana ($CodePoint -ge 0x3099 -and $CodePoint -le 0x30FF) -or # Katakana ($CodePoint -ge 0x3105 -and $CodePoint -le 0x312F) -or # Bopomofo ($CodePoint -ge 0x3131 -and $CodePoint -le 0x318E) -or # Hangul Compatibility Jamo ($CodePoint -ge 0x3190 -and $CodePoint -le 0x31E3) -or # CJK Misc ($CodePoint -ge 0x31F0 -and $CodePoint -le 0x321E) -or # Katakana Phonetic Extensions ($CodePoint -ge 0x3220 -and $CodePoint -le 0x3247) -or # Enclosed CJK ($CodePoint -ge 0x3250 -and $CodePoint -le 0x4DBF) -or # CJK Extension A ($CodePoint -ge 0x4E00 -and $CodePoint -le 0xA48C) -or # CJK Unified Ideographs ($CodePoint -ge 0xA490 -and $CodePoint -le 0xA4C6) -or # Yi Radicals ($CodePoint -ge 0xA960 -and $CodePoint -le 0xA97C) -or # Hangul Jamo Extended-A ($CodePoint -ge 0xAC00 -and $CodePoint -le 0xD7A3) -or # Hangul Syllables ($CodePoint -ge 0xD7B0 -and $CodePoint -le 0xD7C6) -or # Hangul Jamo Extended-B ($CodePoint -ge 0xD7CB -and $CodePoint -le 0xD7FB) -or # Hangul Jamo Extended-B ($CodePoint -ge 0xF900 -and $CodePoint -le 0xFAFF) -or # CJK Compatibility Ideographs ($CodePoint -ge 0xFE10 -and $CodePoint -le 0xFE19) -or # Vertical Forms ($CodePoint -ge 0xFE30 -and $CodePoint -le 0xFE6F) -or # CJK Compatibility Forms ($CodePoint -ge 0xFF00 -and $CodePoint -le 0xFF60) -or # Fullwidth Forms ($CodePoint -ge 0xFFE0 -and $CodePoint -le 0xFFE6) -or # Fullwidth symbols ($CodePoint -ge 0x1B000 -and $CodePoint -le 0x1B2FF) -or # Kana Supplement/Extended ($CodePoint -ge 0x1F200 -and $CodePoint -le 0x1F251) -or # Enclosed Ideographic Supplement ($CodePoint -ge 0x20000 -and $CodePoint -le 0x2FFFD) -or # CJK Extensions ($CodePoint -ge 0x30000 -and $CodePoint -le 0x3FFFD)) { # CJK Extensions return 2 } # Ambiguous Width characters - configurable behavior # Box-drawing characters (U+2500 to U+257F) if ($CodePoint -ge 0x2500 -and $CodePoint -le 0x257F) { if ($TreatAmbiguousAsWide) { return 2 } else { return 1 } } # Other common ambiguous characters $ambiguousChars = @( 0x00A1, 0x00A4, 0x00A7, 0x00A8, 0x00AA, 0x00AD, 0x00AE, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C6, 0x00D0, 0x00D7, 0x00D8, 0x00DE, 0x00DF, 0x00E6, 0x00F0, 0x00F7, 0x00F8, 0x00FE, 0x0101, 0x0111, 0x0113, 0x011B, 0x0126, 0x0127, 0x012B, 0x0131, 0x0132, 0x0133, 0x0138, 0x013F, 0x0140, 0x0141, 0x0142, 0x0144, 0x0148, 0x0149, 0x014A, 0x014B, 0x014D, 0x0152, 0x0153, 0x0166, 0x0167, 0x016B, 0x01CE, 0x01D0, 0x01D2, 0x01D4, 0x01D6, 0x01D8, 0x01DA, 0x01DC, 0x0251, 0x0261, 0x02C4, 0x02C7, 0x02C9, 0x02CA, 0x02CB, 0x02CD, 0x02D0, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DD, 0x02DF, 0x0391, 0x03A9, 0x03B1, 0x03C9, 0x0401, 0x0451, 0x2010, 0x2013, 0x2014, 0x2015, 0x2016, 0x2018, 0x2019, 0x201C, 0x201D, 0x2020, 0x2021, 0x2022, 0x2024, 0x2025, 0x2026, 0x2027, 0x2030, 0x2032, 0x2033, 0x2035, 0x203B, 0x203E, 0x2074, 0x207F, 0x2081, 0x2084, 0x20AC, 0x2103, 0x2105, 0x2109, 0x2113, 0x2116, 0x2121, 0x2122, 0x2126, 0x212B, 0x2153, 0x2154, 0x215B, 0x215C, 0x215D, 0x215E, 0x2160, 0x216B, 0x2170, 0x2179, 0x2189, 0x2190, 0x2194, 0x2195, 0x2199, 0x21B8, 0x21B9, 0x21D2, 0x21D4, 0x21E7, 0x2200, 0x2202, 0x2203, 0x2207, 0x2208, 0x220B, 0x220F, 0x2211, 0x2215, 0x221A, 0x221D, 0x221F, 0x2220, 0x2223, 0x2225, 0x2227, 0x2228, 0x2229, 0x222A, 0x222B, 0x222C, 0x222E, 0x2234, 0x2235, 0x2236, 0x2237, 0x223C, 0x223D, 0x2248, 0x224C, 0x2252, 0x2260, 0x2261, 0x2264, 0x2265, 0x2266, 0x2267, 0x226A, 0x226B, 0x226E, 0x226F, 0x2282, 0x2283, 0x2286, 0x2287, 0x2295, 0x2299, 0x22A5, 0x22BF, 0x2312, 0x2460, 0x24EA, 0x254B, 0x2550, 0x2573, 0x2580, 0x258F, 0x2592, 0x2595, 0x25A0, 0x25A1, 0x25A3, 0x25A9, 0x25B2, 0x25B3, 0x25B6, 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C8, 0x25CB, 0x25CE, 0x25CF, 0x25D0, 0x25D1, 0x25E2, 0x25E5, 0x25EF, 0x2605, 0x2606, 0x2609, 0x260E, 0x260F, 0x261C, 0x261E, 0x2640, 0x2642, 0x2660, 0x2661, 0x2663, 0x2665, 0x2667, 0x266A, 0x266D, 0x266F, 0x269E, 0x269F, 0x26BF, 0x26C6, 0x26C7, 0x26CE, 0x26CF, 0x26E2, 0x26EF, 0x26F1, 0x26F4, 0x26F5, 0x26F7, 0x26FA, 0x26FD, 0x2705, 0x270A, 0x270B, 0x2728, 0x274C, 0x274E, 0x2753, 0x2754, 0x2755, 0x2757, 0x2795, 0x2796, 0x2797, 0x27B0, 0x27BF, 0x2B1B, 0x2B1C, 0x2B50, 0x2B55, 0x2B56, 0x2B59 ) if ($ambiguousChars -contains $CodePoint) { if ($TreatAmbiguousAsWide) { return 2 } else { return 1 } } # Additional ambiguous ranges if (($CodePoint -ge 0x2580 -and $CodePoint -le 0x258F) -or # Block Elements (some) ($CodePoint -ge 0x2592 -and $CodePoint -le 0x2595)) { # Block Elements (some) if ($TreatAmbiguousAsWide) { return 2 } else { return 1 } } # Control characters (non-printable) if ($CodePoint -lt 0x20 -or ($CodePoint -ge 0x7F -and $CodePoint -lt 0xA0)) { return 0 } # Default: narrow (1 cell) - includes ASCII, Latin, etc. return 1 } } process { # Handle empty strings if ([string]::IsNullOrEmpty($Text)) { return 0 } $totalWidth = 0 # Use StringInfo to properly handle grapheme clusters (emoji with modifiers, etc.) $textElementEnumerator = [System.Globalization.StringInfo]::GetTextElementEnumerator($Text) while ($textElementEnumerator.MoveNext()) { $textElement = $textElementEnumerator.GetTextElement() # Get the codepoint of the first character in the text element if ($textElement.Length -eq 1) { $codepoint = [int][char]$textElement[0] } elseif ($textElement.Length -ge 2) { # Handle surrogate pairs (emoji, supplementary characters) try { $codepoint = [char]::ConvertToUtf32($textElement, 0) } catch { # If conversion fails, treat as narrow $codepoint = [int][char]$textElement[0] } } else { continue } $charWidth = Get-CharacterWidth -CodePoint $codepoint -TreatAmbiguousAsWide:$AmbiguousAsWide.IsPresent $totalWidth += $charWidth } return $totalWidth } } |